CVE-2015-8839 ext4: data corruption due to punch hole races (rhbz 1323577 1323579)

2016-04-11 09:42:23 -04:00 · 2016-04-11 09:42:23 -04:00 · 5c8bd1fb1b
parent 3d917f4360
commit 5c8bd1fb1b
5 changed files with 773 additions and 0 deletions
--- a/ext4-fix-races-between-buffered-IO-and-collapse-inse.patch
+++ b/ext4-fix-races-between-buffered-IO-and-collapse-inse.patch
@ -0,0 +1,119 @@
+From 32ebffd3bbb4162da5ff88f9a35dd32d0a28ea70 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.com>
+Date: Mon, 7 Dec 2015 14:31:11 -0500
+Subject: [PATCH 3/4] ext4: fix races between buffered IO and collapse / insert
+ range
+
+Current code implementing FALLOC_FL_COLLAPSE_RANGE and
+FALLOC_FL_INSERT_RANGE is prone to races with buffered writes and page
+faults. If buffered write or write via mmap manages to squeeze between
+filemap_write_and_wait_range() and truncate_pagecache() in the fallocate
+implementations, the written data is simply discarded by
+truncate_pagecache() although it should have been shifted.
+
+Fix the problem by moving filemap_write_and_wait_range() call inside
+i_mutex and i_mmap_sem. That way we are protected against races with
+both buffered writes and page faults.
+
+Signed-off-by: Jan Kara <jack@suse.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+ fs/ext4/extents.c | 59 +++++++++++++++++++++++++++++--------------------------
+ 1 file changed, 31 insertions(+), 28 deletions(-)
+
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 65b5ada2833f..4b105c96df08 100644
+--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
+@@ -5487,21 +5487,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+ 			return ret;
+ 	}
+ 
+-	/*
+-	 * Need to round down offset to be aligned with page size boundary
+-	 * for page size > block size.
+-	 */
+-	ioffset = round_down(offset, PAGE_SIZE);
+-
+-	/* Write out all dirty pages */
+-	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+-					   LLONG_MAX);
+-	if (ret)
+-		return ret;
+-
+-	/* Take mutex lock */
+ 	mutex_lock(&inode->i_mutex);
+-
+ 	/*
+ 	 * There is no need to overlap collapse range with EOF, in which case
+ 	 * it is effectively a truncate operation
+@@ -5526,6 +5512,27 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+ 	 * page cache.
+ 	 */
+ 	down_write(&EXT4_I(inode)->i_mmap_sem);
+	/*
+	 * Need to round down offset to be aligned with page size boundary
+	 * for page size > block size.
+	 */
+	ioffset = round_down(offset, PAGE_SIZE);
+	/*
+	 * Write tail of the last page before removed range since it will get
+	 * removed from the page cache below.
+	 */
+	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
+	if (ret)
+		goto out_mmap;
+	/*
+	 * Write data that will be shifted to preserve them when discarding
+	 * page cache below. We are also protected from pages becoming dirty
+	 * by i_mmap_sem.
+	 */
+	ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
+					   LLONG_MAX);
+	if (ret)
+		goto out_mmap;
+ 	truncate_pagecache(inode, ioffset);
+ 
+ 	credits = ext4_writepage_trans_blocks(inode);
+@@ -5626,21 +5633,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+ 			return ret;
+ 	}
+ 
+-	/*
+-	 * Need to round down to align start offset to page size boundary
+-	 * for page size > block size.
+-	 */
+-	ioffset = round_down(offset, PAGE_SIZE);
+-
+-	/* Write out all dirty pages */
+-	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+-			LLONG_MAX);
+-	if (ret)
+-		return ret;
+-
+-	/* Take mutex lock */
+ 	mutex_lock(&inode->i_mutex);
+-
+ 	/* Currently just for extent based files */
+ 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+ 		ret = -EOPNOTSUPP;
+@@ -5668,6 +5661,16 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+ 	 * page cache.
+ 	 */
+ 	down_write(&EXT4_I(inode)->i_mmap_sem);
+	/*
+	 * Need to round down to align start offset to page size boundary
+	 * for page size > block size.
+	 */
+	ioffset = round_down(offset, PAGE_SIZE);
+	/* Write out all dirty pages */
+	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+			LLONG_MAX);
+	if (ret)
+		goto out_mmap;
+ 	truncate_pagecache(inode, ioffset);
+ 
+ 	credits = ext4_writepage_trans_blocks(inode);
+-- 
+2.5.5
+
--- a/ext4-fix-races-between-page-faults-and-hole-punching.patch
+++ b/ext4-fix-races-between-page-faults-and-hole-punching.patch
@ -0,0 +1,442 @@
+From ea3d7209ca01da209cda6f0dea8be9cc4b7a933b Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.com>
+Date: Mon, 7 Dec 2015 14:28:03 -0500
+Subject: [PATCH 1/4] ext4: fix races between page faults and hole punching
+
+Currently, page faults and hole punching are completely unsynchronized.
+This can result in page fault faulting in a page into a range that we
+are punching after truncate_pagecache_range() has been called and thus
+we can end up with a page mapped to disk blocks that will be shortly
+freed. Filesystem corruption will shortly follow. Note that the same
+race is avoided for truncate by checking page fault offset against
+i_size but there isn't similar mechanism available for punching holes.
+
+Fix the problem by creating new rw semaphore i_mmap_sem in inode and
+grab it for writing over truncate, hole punching, and other functions
+removing blocks from extent tree and for read over page faults. We
+cannot easily use i_data_sem for this since that ranks below transaction
+start and we need something ranking above it so that it can be held over
+the whole truncate / hole punching operation. Also remove various
+workarounds we had in the code to reduce race window when page fault
+could have created pages with stale mapping information.
+
+Signed-off-by: Jan Kara <jack@suse.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+ fs/ext4/ext4.h     | 10 +++++++++
+ fs/ext4/extents.c  | 54 ++++++++++++++++++++++++--------------------
+ fs/ext4/file.c     | 66 ++++++++++++++++++++++++++++++++++++++++++++++--------
+ fs/ext4/inode.c    | 36 +++++++++++++++++++++--------
+ fs/ext4/super.c    |  1 +
+ fs/ext4/truncate.h |  2 ++
+ 6 files changed, 127 insertions(+), 42 deletions(-)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index cc7ca4e87144..348a5ff4a0e2 100644
+--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
+@@ -910,6 +910,15 @@ struct ext4_inode_info {
+ 	 * by other means, so we have i_data_sem.
+ 	 */
+ 	struct rw_semaphore i_data_sem;
+	/*
+	 * i_mmap_sem is for serializing page faults with truncate / punch hole
+	 * operations. We have to make sure that new page cannot be faulted in
+	 * a section of the inode that is being punched. We cannot easily use
+	 * i_data_sem for this since we need protection for the whole punch
+	 * operation and i_data_sem ranks below transaction start so we have
+	 * to occasionally drop it.
+	 */
+	struct rw_semaphore i_mmap_sem;
+ 	struct inode vfs_inode;
+ 	struct jbd2_inode *jinode;
+ 
+@@ -2484,6 +2493,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
+ extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
+ 			     loff_t lstart, loff_t lend);
+ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+ extern qsize_t *ext4_get_reserved_space(struct inode *inode);
+ extern void ext4_da_update_reserve_space(struct inode *inode,
+ 					int used, int quota_claim);
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 551353b1b17a..5be9ca5a8a7a 100644
+--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
+@@ -4770,7 +4770,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
+ 	int partial_begin, partial_end;
+ 	loff_t start, end;
+ 	ext4_lblk_t lblk;
+-	struct address_space *mapping = inode->i_mapping;
+ 	unsigned int blkbits = inode->i_blkbits;
+ 
+ 	trace_ext4_zero_range(inode, offset, len, mode);
+@@ -4786,17 +4785,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
+ 	}
+ 
+ 	/*
+-	 * Write out all dirty pages to avoid race conditions
+-	 * Then release them.
+-	 */
+-	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+-		ret = filemap_write_and_wait_range(mapping, offset,
+-						   offset + len - 1);
+-		if (ret)
+-			return ret;
+-	}
+-
+-	/*
+ 	 * Round up offset. This is not fallocate, we neet to zero out
+ 	 * blocks, so convert interior block aligned part of the range to
+ 	 * unwritten and possibly manually zero out unaligned parts of the
+@@ -4856,16 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset,
+ 		flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
+ 			  EXT4_EX_NOCACHE);
+ 
+-		/* Now release the pages and zero block aligned part of pages*/
+-		truncate_pagecache_range(inode, start, end - 1);
+-		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+-
+ 		/* Wait all existing dio workers, newcomers will block on i_mutex */
+ 		ext4_inode_block_unlocked_dio(inode);
+ 		inode_dio_wait(inode);
+ 
+		/*
+		 * Prevent page faults from reinstantiating pages we have
+		 * released from page cache.
+		 */
+		down_write(&EXT4_I(inode)->i_mmap_sem);
+		/* Now release the pages and zero block aligned part of pages */
+		truncate_pagecache_range(inode, start, end - 1);
+		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+
+ 		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+ 					     flags, mode);
+		up_write(&EXT4_I(inode)->i_mmap_sem);
+ 		if (ret)
+ 			goto out_dio;
+ 	}
+@@ -5524,17 +5518,22 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+ 		goto out_mutex;
+ 	}
+ 
+-	truncate_pagecache(inode, ioffset);
+-
+ 	/* Wait for existing dio to complete */
+ 	ext4_inode_block_unlocked_dio(inode);
+ 	inode_dio_wait(inode);
+ 
+	/*
+	 * Prevent page faults from reinstantiating pages we have released from
+	 * page cache.
+	 */
+	down_write(&EXT4_I(inode)->i_mmap_sem);
+	truncate_pagecache(inode, ioffset);
+
+ 	credits = ext4_writepage_trans_blocks(inode);
+ 	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
+ 	if (IS_ERR(handle)) {
+ 		ret = PTR_ERR(handle);
+-		goto out_dio;
+		goto out_mmap;
+ 	}
+ 
+ 	down_write(&EXT4_I(inode)->i_data_sem);
+@@ -5573,7 +5572,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+ 
+ out_stop:
+ 	ext4_journal_stop(handle);
+-out_dio:
+out_mmap:
+	up_write(&EXT4_I(inode)->i_mmap_sem);
+ 	ext4_inode_resume_unlocked_dio(inode);
+ out_mutex:
+ 	mutex_unlock(&inode->i_mutex);
+@@ -5660,17 +5660,22 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+ 		goto out_mutex;
+ 	}
+ 
+-	truncate_pagecache(inode, ioffset);
+-
+ 	/* Wait for existing dio to complete */
+ 	ext4_inode_block_unlocked_dio(inode);
+ 	inode_dio_wait(inode);
+ 
+	/*
+	 * Prevent page faults from reinstantiating pages we have released from
+	 * page cache.
+	 */
+	down_write(&EXT4_I(inode)->i_mmap_sem);
+	truncate_pagecache(inode, ioffset);
+
+ 	credits = ext4_writepage_trans_blocks(inode);
+ 	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
+ 	if (IS_ERR(handle)) {
+ 		ret = PTR_ERR(handle);
+-		goto out_dio;
+		goto out_mmap;
+ 	}
+ 
+ 	/* Expand file to avoid data loss if there is error while shifting */
+@@ -5741,7 +5746,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+ 
+ out_stop:
+ 	ext4_journal_stop(handle);
+-out_dio:
+out_mmap:
+	up_write(&EXT4_I(inode)->i_mmap_sem);
+ 	ext4_inode_resume_unlocked_dio(inode);
+ out_mutex:
+ 	mutex_unlock(&inode->i_mutex);
+diff --git a/fs/ext4/file.c b/fs/ext4/file.c
+index 113837e7ba98..0d24ebcd7c9e 100644
+--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
+@@ -209,15 +209,18 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+ {
+ 	int result;
+ 	handle_t *handle = NULL;
+-	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+	struct inode *inode = file_inode(vma->vm_file);
+	struct super_block *sb = inode->i_sb;
+ 	bool write = vmf->flags & FAULT_FLAG_WRITE;
+ 
+ 	if (write) {
+ 		sb_start_pagefault(sb);
+ 		file_update_time(vma->vm_file);
+		down_read(&EXT4_I(inode)->i_mmap_sem);
+ 		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
+ 						EXT4_DATA_TRANS_BLOCKS(sb));
+-	}
+	} else
+		down_read(&EXT4_I(inode)->i_mmap_sem);
+ 
+ 	if (IS_ERR(handle))
+ 		result = VM_FAULT_SIGBUS;
+@@ -228,8 +231,10 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+ 	if (write) {
+ 		if (!IS_ERR(handle))
+ 			ext4_journal_stop(handle);
+		up_read(&EXT4_I(inode)->i_mmap_sem);
+ 		sb_end_pagefault(sb);
+-	}
+	} else
+		up_read(&EXT4_I(inode)->i_mmap_sem);
+ 
+ 	return result;
+ }
+@@ -246,10 +251,12 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
+ 	if (write) {
+ 		sb_start_pagefault(sb);
+ 		file_update_time(vma->vm_file);
+		down_read(&EXT4_I(inode)->i_mmap_sem);
+ 		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
+ 				ext4_chunk_trans_blocks(inode,
+ 							PMD_SIZE / PAGE_SIZE));
+-	}
+	} else
+		down_read(&EXT4_I(inode)->i_mmap_sem);
+ 
+ 	if (IS_ERR(handle))
+ 		result = VM_FAULT_SIGBUS;
+@@ -260,30 +267,71 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
+ 	if (write) {
+ 		if (!IS_ERR(handle))
+ 			ext4_journal_stop(handle);
+		up_read(&EXT4_I(inode)->i_mmap_sem);
+ 		sb_end_pagefault(sb);
+-	}
+	} else
+		up_read(&EXT4_I(inode)->i_mmap_sem);
+ 
+ 	return result;
+ }
+ 
+ static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+ {
+-	return dax_mkwrite(vma, vmf, ext4_get_block_dax,
+-				ext4_end_io_unwritten);
+	int err;
+	struct inode *inode = file_inode(vma->vm_file);
+
+	sb_start_pagefault(inode->i_sb);
+	file_update_time(vma->vm_file);
+	down_read(&EXT4_I(inode)->i_mmap_sem);
+	err = __dax_mkwrite(vma, vmf, ext4_get_block_dax,
+			    ext4_end_io_unwritten);
+	up_read(&EXT4_I(inode)->i_mmap_sem);
+	sb_end_pagefault(inode->i_sb);
+
+	return err;
+}
+
+/*
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * handler we check for races agaist truncate. Note that since we cycle through
+ * i_mmap_sem, we are sure that also any hole punching that began before we
+ * were called is finished by now and so if it included part of the file we
+ * are working on, our pte will get unmapped and the check for pte_same() in
+ * wp_pfn_shared() fails. Thus fault gets retried and things work out as
+ * desired.
+ */
+static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
+				struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+	struct super_block *sb = inode->i_sb;
+	int ret = VM_FAULT_NOPAGE;
+	loff_t size;
+
+	sb_start_pagefault(sb);
+	file_update_time(vma->vm_file);
+	down_read(&EXT4_I(inode)->i_mmap_sem);
+	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	if (vmf->pgoff >= size)
+		ret = VM_FAULT_SIGBUS;
+	up_read(&EXT4_I(inode)->i_mmap_sem);
+	sb_end_pagefault(sb);
+
+	return ret;
+ }
+ 
+ static const struct vm_operations_struct ext4_dax_vm_ops = {
+ 	.fault		= ext4_dax_fault,
+ 	.pmd_fault	= ext4_dax_pmd_fault,
+ 	.page_mkwrite	= ext4_dax_mkwrite,
+-	.pfn_mkwrite	= dax_pfn_mkwrite,
+	.pfn_mkwrite	= ext4_dax_pfn_mkwrite,
+ };
+ #else
+ #define ext4_dax_vm_ops	ext4_file_vm_ops
+ #endif
+ 
+ static const struct vm_operations_struct ext4_file_vm_ops = {
+-	.fault		= filemap_fault,
+	.fault		= ext4_filemap_fault,
+ 	.map_pages	= filemap_map_pages,
+ 	.page_mkwrite   = ext4_page_mkwrite,
+ };
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index ea433a7f4bca..d1207d03c961 100644
+--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
+@@ -3623,6 +3623,15 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
+ 
+ 	}
+ 
+	/* Wait all existing dio workers, newcomers will block on i_mutex */
+	ext4_inode_block_unlocked_dio(inode);
+	inode_dio_wait(inode);
+
+	/*
+	 * Prevent page faults from reinstantiating pages we have released from
+	 * page cache.
+	 */
+	down_write(&EXT4_I(inode)->i_mmap_sem);
+ 	first_block_offset = round_up(offset, sb->s_blocksize);
+ 	last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
+ 
+@@ -3631,10 +3640,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
+ 		truncate_pagecache_range(inode, first_block_offset,
+ 					 last_block_offset);
+ 
+-	/* Wait all existing dio workers, newcomers will block on i_mutex */
+-	ext4_inode_block_unlocked_dio(inode);
+-	inode_dio_wait(inode);
+-
+ 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+ 		credits = ext4_writepage_trans_blocks(inode);
+ 	else
+@@ -3680,16 +3685,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
+ 	if (IS_SYNC(inode))
+ 		ext4_handle_sync(handle);
+ 
+-	/* Now release the pages again to reduce race window */
+-	if (last_block_offset > first_block_offset)
+-		truncate_pagecache_range(inode, first_block_offset,
+-					 last_block_offset);
+-
+ 	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ 	ext4_mark_inode_dirty(handle, inode);
+ out_stop:
+ 	ext4_journal_stop(handle);
+ out_dio:
+	up_write(&EXT4_I(inode)->i_mmap_sem);
+ 	ext4_inode_resume_unlocked_dio(inode);
+ out_mutex:
+ 	mutex_unlock(&inode->i_mutex);
+@@ -4823,6 +4824,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
+ 			} else
+ 				ext4_wait_for_tail_page_commit(inode);
+ 		}
+		down_write(&EXT4_I(inode)->i_mmap_sem);
+ 		/*
+ 		 * Truncate pagecache after we've waited for commit
+ 		 * in data=journal mode to make pages freeable.
+@@ -4830,6 +4832,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
+ 		truncate_pagecache(inode, inode->i_size);
+ 		if (shrink)
+ 			ext4_truncate(inode);
+		up_write(&EXT4_I(inode)->i_mmap_sem);
+ 	}
+ 
+ 	if (!rc) {
+@@ -5278,6 +5281,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+ 
+ 	sb_start_pagefault(inode->i_sb);
+ 	file_update_time(vma->vm_file);
+
+	down_read(&EXT4_I(inode)->i_mmap_sem);
+ 	/* Delalloc case is easy... */
+ 	if (test_opt(inode->i_sb, DELALLOC) &&
+ 	    !ext4_should_journal_data(inode) &&
+@@ -5347,6 +5352,19 @@ retry_alloc:
+ out_ret:
+ 	ret = block_page_mkwrite_return(ret);
+ out:
+	up_read(&EXT4_I(inode)->i_mmap_sem);
+ 	sb_end_pagefault(inode->i_sb);
+ 	return ret;
+ }
+
+int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+	int err;
+
+	down_read(&EXT4_I(inode)->i_mmap_sem);
+	err = filemap_fault(vma, vmf);
+	up_read(&EXT4_I(inode)->i_mmap_sem);
+
+	return err;
+}
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index c9ab67da6e5a..493370e6590e 100644
+--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
+@@ -958,6 +958,7 @@ static void init_once(void *foo)
+ 	INIT_LIST_HEAD(&ei->i_orphan);
+ 	init_rwsem(&ei->xattr_sem);
+ 	init_rwsem(&ei->i_data_sem);
+	init_rwsem(&ei->i_mmap_sem);
+ 	inode_init_once(&ei->vfs_inode);
+ }
+ 
+diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
+index 011ba6670d99..c70d06a383e2 100644
+--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
+@@ -10,8 +10,10 @@
+  */
+ static inline void ext4_truncate_failed_write(struct inode *inode)
+ {
+	down_write(&EXT4_I(inode)->i_mmap_sem);
+ 	truncate_inode_pages(inode->i_mapping, inode->i_size);
+ 	ext4_truncate(inode);
+	up_write(&EXT4_I(inode)->i_mmap_sem);
+ }
+ 
+ /*
+-- 
+2.5.5
+
--- a/ext4-fix-races-of-writeback-with-punch-hole-and-zero.patch
+++ b/ext4-fix-races-of-writeback-with-punch-hole-and-zero.patch
@ -0,0 +1,110 @@
+From 011278485ecc3cd2a3954b5d4c73101d919bf1fa Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.com>
+Date: Mon, 7 Dec 2015 14:34:49 -0500
+Subject: [PATCH 4/4] ext4: fix races of writeback with punch hole and zero
+ range
+
+When doing delayed allocation, update of on-disk inode size is postponed
+until IO submission time. However hole punch or zero range fallocate
+calls can end up discarding the tail page cache page and thus on-disk
+inode size would never be properly updated.
+
+Make sure the on-disk inode size is updated before truncating page
+cache.
+
+Signed-off-by: Jan Kara <jack@suse.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+ fs/ext4/ext4.h    |  3 +++
+ fs/ext4/extents.c |  5 +++++
+ fs/ext4/inode.c   | 35 ++++++++++++++++++++++++++++++++++-
+ 3 files changed, 42 insertions(+), 1 deletion(-)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 348a5ff4a0e2..80f76f092079 100644
+--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
+@@ -2858,6 +2858,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
+ 	return changed;
+ }
+ 
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+				      loff_t len);
+
+ struct ext4_group_info {
+ 	unsigned long   bb_state;
+ 	struct rb_root  bb_free_root;
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 4b105c96df08..3578b25fccfd 100644
+--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
+@@ -4847,6 +4847,11 @@ static long ext4_zero_range(struct file *file, loff_t offset,
+ 		 * released from page cache.
+ 		 */
+ 		down_write(&EXT4_I(inode)->i_mmap_sem);
+		ret = ext4_update_disksize_before_punch(inode, offset, len);
+		if (ret) {
+			up_write(&EXT4_I(inode)->i_mmap_sem);
+			goto out_dio;
+		}
+ 		/* Now release the pages and zero block aligned part of pages */
+ 		truncate_pagecache_range(inode, start, end - 1);
+ 		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index d1207d03c961..472e608da13d 100644
+--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
+@@ -3559,6 +3559,35 @@ int ext4_can_truncate(struct inode *inode)
+ }
+ 
+ /*
+ * We have to make sure i_disksize gets properly updated before we truncate
+ * page cache due to hole punching or zero range. Otherwise i_disksize update
+ * can get lost as it may have been postponed to submission of writeback but
+ * that will never happen after we truncate page cache.
+ */
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+				      loff_t len)
+{
+	handle_t *handle;
+	loff_t size = i_size_read(inode);
+
+	WARN_ON(!mutex_is_locked(&inode->i_mutex));
+	if (offset > size || offset + len < size)
+		return 0;
+
+	if (EXT4_I(inode)->i_disksize >= size)
+		return 0;
+
+	handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ext4_update_i_disksize(inode, size);
+	ext4_mark_inode_dirty(handle, inode);
+	ext4_journal_stop(handle);
+
+	return 0;
+}
+
+/*
+  * ext4_punch_hole: punches a hole in a file by releaseing the blocks
+  * associated with the given offset and length
+  *
+@@ -3636,9 +3665,13 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
+ 	last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
+ 
+ 	/* Now release the pages and zero block aligned part of pages*/
+-	if (last_block_offset > first_block_offset)
+	if (last_block_offset > first_block_offset) {
+		ret = ext4_update_disksize_before_punch(inode, offset, length);
+		if (ret)
+			goto out_dio;
+ 		truncate_pagecache_range(inode, first_block_offset,
+ 					 last_block_offset);
+	}
+ 
+ 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+ 		credits = ext4_writepage_trans_blocks(inode);
+-- 
+2.5.5
+
--- a/ext4-move-unlocked-dio-protection-from-ext4_alloc_fi.patch
+++ b/ext4-move-unlocked-dio-protection-from-ext4_alloc_fi.patch
@ -0,0 +1,93 @@
+From 17048e8a083fec7ad841d88ef0812707fbc7e39f Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.com>
+Date: Mon, 7 Dec 2015 14:29:17 -0500
+Subject: [PATCH 2/4] ext4: move unlocked dio protection from
+ ext4_alloc_file_blocks()
+
+Currently ext4_alloc_file_blocks() was handling protection against
+unlocked DIO. However we now need to sometimes call it under i_mmap_sem
+and sometimes not and DIO protection ranks above it (although strictly
+speaking this cannot currently create any deadlocks). Also
+ext4_zero_range() was actually getting & releasing unlocked DIO
+protection twice in some cases. Luckily it didn't introduce any real bug
+but it was a land mine waiting to be stepped on.  So move DIO protection
+out from ext4_alloc_file_blocks() into the two callsites.
+
+Signed-off-by: Jan Kara <jack@suse.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+ fs/ext4/extents.c | 21 ++++++++++-----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 5be9ca5a8a7a..65b5ada2833f 100644
+--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
+@@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
+ 	if (len <= EXT_UNWRITTEN_MAX_LEN)
+ 		flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
+ 
+-	/* Wait all existing dio workers, newcomers will block on i_mutex */
+-	ext4_inode_block_unlocked_dio(inode);
+-	inode_dio_wait(inode);
+-
+ 	/*
+ 	 * credits to insert 1 extent into extent tree
+ 	 */
+@@ -4752,8 +4748,6 @@ retry:
+ 		goto retry;
+ 	}
+ 
+-	ext4_inode_resume_unlocked_dio(inode);
+-
+ 	return ret > 0 ? ret2 : ret;
+ }
+ 
+@@ -4827,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
+ 	if (mode & FALLOC_FL_KEEP_SIZE)
+ 		flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
+ 
+	/* Wait all existing dio workers, newcomers will block on i_mutex */
+	ext4_inode_block_unlocked_dio(inode);
+	inode_dio_wait(inode);
+
+ 	/* Preallocate the range including the unaligned edges */
+ 	if (partial_begin || partial_end) {
+ 		ret = ext4_alloc_file_blocks(file,
+@@ -4835,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
+ 				 round_down(offset, 1 << blkbits)) >> blkbits,
+ 				new_size, flags, mode);
+ 		if (ret)
+-			goto out_mutex;
+			goto out_dio;
+ 
+ 	}
+ 
+@@ -4844,10 +4842,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
+ 		flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
+ 			  EXT4_EX_NOCACHE);
+ 
+-		/* Wait all existing dio workers, newcomers will block on i_mutex */
+-		ext4_inode_block_unlocked_dio(inode);
+-		inode_dio_wait(inode);
+-
+ 		/*
+ 		 * Prevent page faults from reinstantiating pages we have
+ 		 * released from page cache.
+@@ -4992,8 +4986,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
+ 			goto out;
+ 	}
+ 
+	/* Wait all existing dio workers, newcomers will block on i_mutex */
+	ext4_inode_block_unlocked_dio(inode);
+	inode_dio_wait(inode);
+
+ 	ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+ 				     flags, mode);
+	ext4_inode_resume_unlocked_dio(inode);
+ 	if (ret)
+ 		goto out;
+ 
+-- 
+2.5.5
+
--- a/kernel.spec
+++ b/kernel.spec
@ -681,6 +681,12 @@ Patch689: x86-iopl-64-Properly-context-switch-IOPL-on-Xen-PV.patch
 # CVE-2016-3672 rhbz 1324749 1324750
 Patch690: x86-mm-32-Enable-full-randomization-on-i386-and-X86_.patch

+#CVE-2015-8839 rhbz 1323577 1323579
+Patch691: ext4-fix-races-between-page-faults-and-hole-punching.patch
+Patch692: ext4-move-unlocked-dio-protection-from-ext4_alloc_fi.patch
+Patch693: ext4-fix-races-between-buffered-IO-and-collapse-inse.patch
+Patch694: ext4-fix-races-of-writeback-with-punch-hole-and-zero.patch
+
 # END OF PATCH DEFINITIONS
 %endif

@ -2124,6 +2130,9 @@ fi
 #
 # 
 %changelog
+* Mon Apr 11 2016 Josh Boyer <jwboyer@fedoraproject.org>
+- CVE-2015-8839 ext4: data corruption due to punch hole races (rhbz 1323577 1323579)
+
 * Thu Apr 07 2016 Justin M. Forbes <jforbes@fedoraproject.org>
 - Enable Full Randomization on 32bit x86 CVE-2016-3672 (rhbz 1324749 1324750)