Apply patches from Jeff Moyer to fix direct-io oops (rhbz 812129)
This commit is contained in:
parent
e7887c8a48
commit
c9be30821b
|
@ -0,0 +1,214 @@
|
|||
Fix a crash when block device is read and block size is changed at the same time
|
||||
|
||||
commit b87570f5d349661814b262dd5fc40787700f80d6
|
||||
Author: Mikulas Patocka <mpatocka@redhat.com>
|
||||
Date: Wed Sep 26 07:46:40 2012 +0200
|
||||
|
||||
Fix a crash when block device is read and block size is changed at the same time
|
||||
|
||||
The kernel may crash when block size is changed and I/O is issued
|
||||
simultaneously.
|
||||
|
||||
Because some subsystems (udev or lvm) may read any block device anytime,
|
||||
the bug actually puts any code that changes a block device size in
|
||||
jeopardy.
|
||||
|
||||
The crash can be reproduced if you place "msleep(1000)" to
|
||||
blkdev_get_blocks just before "bh->b_size = max_blocks <<
|
||||
inode->i_blkbits;".
|
||||
Then, run "dd if=/dev/ram0 of=/dev/null bs=4k count=1 iflag=direct"
|
||||
While it is waiting in msleep, run "blockdev --setbsz 2048 /dev/ram0"
|
||||
You get a BUG.
|
||||
|
||||
The direct and non-direct I/O is written with the assumption that block
|
||||
size does not change. It doesn't seem practical to fix these crashes
|
||||
one-by-one there may be many crash possibilities when block size changes
|
||||
at a certain place and it is impossible to find them all and verify the
|
||||
code.
|
||||
|
||||
This patch introduces a new rw-lock bd_block_size_semaphore. The lock is
|
||||
taken for read during I/O. It is taken for write when changing block
|
||||
size. Consequently, block size can't be changed while I/O is being
|
||||
submitted.
|
||||
|
||||
For asynchronous I/O, the patch only prevents block size change while
|
||||
the I/O is being submitted. The block size can change when the I/O is in
|
||||
progress or when the I/O is being finished. This is acceptable because
|
||||
there are no accesses to block size when asynchronous I/O is being
|
||||
finished.
|
||||
|
||||
The patch prevents block size changing while the device is mapped with
|
||||
mmap.
|
||||
|
||||
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
|
||||
Index: linux-3.6.x86_64/drivers/char/raw.c
|
||||
===================================================================
|
||||
--- linux-3.6.x86_64.orig/drivers/char/raw.c 2012-11-16 17:12:35.127010280 -0500
|
||||
+++ linux-3.6.x86_64/drivers/char/raw.c 2012-11-16 17:12:37.381002516 -0500
|
||||
@@ -285,7 +285,7 @@
|
||||
|
||||
static const struct file_operations raw_fops = {
|
||||
.read = do_sync_read,
|
||||
- .aio_read = generic_file_aio_read,
|
||||
+ .aio_read = blkdev_aio_read,
|
||||
.write = do_sync_write,
|
||||
.aio_write = blkdev_aio_write,
|
||||
.fsync = blkdev_fsync,
|
||||
Index: linux-3.6.x86_64/fs/block_dev.c
|
||||
===================================================================
|
||||
--- linux-3.6.x86_64.orig/fs/block_dev.c 2012-11-16 17:12:35.127010280 -0500
|
||||
+++ linux-3.6.x86_64/fs/block_dev.c 2012-11-16 17:12:37.381002516 -0500
|
||||
@@ -116,6 +116,8 @@
|
||||
|
||||
int set_blocksize(struct block_device *bdev, int size)
|
||||
{
|
||||
+ struct address_space *mapping;
|
||||
+
|
||||
/* Size must be a power of two, and between 512 and PAGE_SIZE */
|
||||
if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
|
||||
return -EINVAL;
|
||||
@@ -124,6 +126,20 @@
|
||||
if (size < bdev_logical_block_size(bdev))
|
||||
return -EINVAL;
|
||||
|
||||
+ /* Prevent starting I/O or mapping the device */
|
||||
+ down_write(&bdev->bd_block_size_semaphore);
|
||||
+
|
||||
+ /* Check that the block device is not memory mapped */
|
||||
+ mapping = bdev->bd_inode->i_mapping;
|
||||
+ mutex_lock(&mapping->i_mmap_mutex);
|
||||
+ if (!prio_tree_empty(&mapping->i_mmap) ||
|
||||
+ !list_empty(&mapping->i_mmap_nonlinear)) {
|
||||
+ mutex_unlock(&mapping->i_mmap_mutex);
|
||||
+ up_write(&bdev->bd_block_size_semaphore);
|
||||
+ return -EBUSY;
|
||||
+ }
|
||||
+ mutex_unlock(&mapping->i_mmap_mutex);
|
||||
+
|
||||
/* Don't change the size if it is same as current */
|
||||
if (bdev->bd_block_size != size) {
|
||||
sync_blockdev(bdev);
|
||||
@@ -131,6 +147,9 @@
|
||||
bdev->bd_inode->i_blkbits = blksize_bits(size);
|
||||
kill_bdev(bdev);
|
||||
}
|
||||
+
|
||||
+ up_write(&bdev->bd_block_size_semaphore);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -472,6 +491,7 @@
|
||||
inode_init_once(&ei->vfs_inode);
|
||||
/* Initialize mutex for freeze. */
|
||||
mutex_init(&bdev->bd_fsfreeze_mutex);
|
||||
+ init_rwsem(&bdev->bd_block_size_semaphore);
|
||||
}
|
||||
|
||||
static inline void __bd_forget(struct inode *inode)
|
||||
@@ -1567,6 +1587,22 @@
|
||||
return blkdev_ioctl(bdev, mode, cmd, arg);
|
||||
}
|
||||
|
||||
+ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
|
||||
+ unsigned long nr_segs, loff_t pos)
|
||||
+{
|
||||
+ ssize_t ret;
|
||||
+ struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
|
||||
+
|
||||
+ down_read(&bdev->bd_block_size_semaphore);
|
||||
+
|
||||
+ ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
|
||||
+
|
||||
+ up_read(&bdev->bd_block_size_semaphore);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(blkdev_aio_read);
|
||||
+
|
||||
/*
|
||||
* Write data to the block device. Only intended for the block device itself
|
||||
* and the raw driver which basically is a fake block device.
|
||||
@@ -1578,12 +1614,16 @@
|
||||
unsigned long nr_segs, loff_t pos)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
+ struct block_device *bdev = I_BDEV(file->f_mapping->host);
|
||||
struct blk_plug plug;
|
||||
ssize_t ret;
|
||||
|
||||
BUG_ON(iocb->ki_pos != pos);
|
||||
|
||||
blk_start_plug(&plug);
|
||||
+
|
||||
+ down_read(&bdev->bd_block_size_semaphore);
|
||||
+
|
||||
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
|
||||
if (ret > 0 || ret == -EIOCBQUEUED) {
|
||||
ssize_t err;
|
||||
@@ -1592,11 +1632,29 @@
|
||||
if (err < 0 && ret > 0)
|
||||
ret = err;
|
||||
}
|
||||
+
|
||||
+ up_read(&bdev->bd_block_size_semaphore);
|
||||
+
|
||||
blk_finish_plug(&plug);
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkdev_aio_write);
|
||||
|
||||
+int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
+{
|
||||
+ int ret;
|
||||
+ struct block_device *bdev = I_BDEV(file->f_mapping->host);
|
||||
+
|
||||
+ down_read(&bdev->bd_block_size_semaphore);
|
||||
+
|
||||
+ ret = generic_file_mmap(file, vma);
|
||||
+
|
||||
+ up_read(&bdev->bd_block_size_semaphore);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Try to release a page associated with block device when the system
|
||||
* is under memory pressure.
|
||||
@@ -1627,9 +1685,9 @@
|
||||
.llseek = block_llseek,
|
||||
.read = do_sync_read,
|
||||
.write = do_sync_write,
|
||||
- .aio_read = generic_file_aio_read,
|
||||
+ .aio_read = blkdev_aio_read,
|
||||
.aio_write = blkdev_aio_write,
|
||||
- .mmap = generic_file_mmap,
|
||||
+ .mmap = blkdev_mmap,
|
||||
.fsync = blkdev_fsync,
|
||||
.unlocked_ioctl = block_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
Index: linux-3.6.x86_64/include/linux/fs.h
|
||||
===================================================================
|
||||
--- linux-3.6.x86_64.orig/include/linux/fs.h 2012-11-16 17:12:35.127010280 -0500
|
||||
+++ linux-3.6.x86_64/include/linux/fs.h 2012-11-16 17:12:37.424002387 -0500
|
||||
@@ -724,6 +724,8 @@
|
||||
int bd_fsfreeze_count;
|
||||
/* Mutex for freeze */
|
||||
struct mutex bd_fsfreeze_mutex;
|
||||
+ /* A semaphore that prevents I/O while block size is being changed */
|
||||
+ struct rw_semaphore bd_block_size_semaphore;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -2564,6 +2566,8 @@
|
||||
unsigned long *nr_segs, size_t *count, int access_flags);
|
||||
|
||||
/* fs/block_dev.c */
|
||||
+extern ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
|
||||
+ unsigned long nr_segs, loff_t pos);
|
||||
extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos);
|
||||
extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
|
|
@ -0,0 +1,290 @@
|
|||
blockdev: turn a rw semaphore into a percpu rw semaphore
|
||||
|
||||
commit 62ac665ff9fc07497ca524bd20d6a96893d11071
|
||||
Author: Mikulas Patocka <mpatocka@redhat.com>
|
||||
Date: Wed Sep 26 07:46:43 2012 +0200
|
||||
|
||||
blockdev: turn a rw semaphore into a percpu rw semaphore
|
||||
|
||||
This avoids cache line bouncing when many processes lock the semaphore
|
||||
for read.
|
||||
|
||||
New percpu lock implementation
|
||||
|
||||
The lock consists of an array of percpu unsigned integers, a boolean
|
||||
variable and a mutex.
|
||||
|
||||
When we take the lock for read, we enter rcu read section, check for a
|
||||
"locked" variable. If it is false, we increase a percpu counter on the
|
||||
current cpu and exit the rcu section. If "locked" is true, we exit the
|
||||
rcu section, take the mutex and drop it (this waits until a writer
|
||||
finished) and retry.
|
||||
|
||||
Unlocking for read just decreases percpu variable. Note that we can
|
||||
unlock on a difference cpu than where we locked, in this case the
|
||||
counter underflows. The sum of all percpu counters represents the number
|
||||
of processes that hold the lock for read.
|
||||
|
||||
When we need to lock for write, we take the mutex, set "locked" variable
|
||||
to true and synchronize rcu. Since RCU has been synchronized, no
|
||||
processes can create new read locks. We wait until the sum of percpu
|
||||
counters is zero - when it is, there are no readers in the critical
|
||||
section.
|
||||
|
||||
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
|
||||
Index: linux-3.6.x86_64/Documentation/percpu-rw-semaphore.txt
|
||||
===================================================================
|
||||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||||
+++ linux-3.6.x86_64/Documentation/percpu-rw-semaphore.txt 2012-11-16 17:12:57.351936583 -0500
|
||||
@@ -0,0 +1,27 @@
|
||||
+Percpu rw semaphores
|
||||
+--------------------
|
||||
+
|
||||
+Percpu rw semaphores is a new read-write semaphore design that is
|
||||
+optimized for locking for reading.
|
||||
+
|
||||
+The problem with traditional read-write semaphores is that when multiple
|
||||
+cores take the lock for reading, the cache line containing the semaphore
|
||||
+is bouncing between L1 caches of the cores, causing performance
|
||||
+degradation.
|
||||
+
|
||||
+Locking for reading it very fast, it uses RCU and it avoids any atomic
|
||||
+instruction in the lock and unlock path. On the other hand, locking for
|
||||
+writing is very expensive, it calls synchronize_rcu() that can take
|
||||
+hundreds of microseconds.
|
||||
+
|
||||
+The lock is declared with "struct percpu_rw_semaphore" type.
|
||||
+The lock is initialized percpu_init_rwsem, it returns 0 on success and
|
||||
+-ENOMEM on allocation failure.
|
||||
+The lock must be freed with percpu_free_rwsem to avoid memory leak.
|
||||
+
|
||||
+The lock is locked for read with percpu_down_read, percpu_up_read and
|
||||
+for write with percpu_down_write, percpu_up_write.
|
||||
+
|
||||
+The idea of using RCU for optimized rw-lock was introduced by
|
||||
+Eric Dumazet <eric.dumazet@gmail.com>.
|
||||
+The code was written by Mikulas Patocka <mpatocka@redhat.com>
|
||||
Index: linux-3.6.x86_64/fs/block_dev.c
|
||||
===================================================================
|
||||
--- linux-3.6.x86_64.orig/fs/block_dev.c 2012-11-16 17:12:37.381002516 -0500
|
||||
+++ linux-3.6.x86_64/fs/block_dev.c 2012-11-16 17:27:41.217005828 -0500
|
||||
@@ -127,7 +127,7 @@
|
||||
return -EINVAL;
|
||||
|
||||
/* Prevent starting I/O or mapping the device */
|
||||
- down_write(&bdev->bd_block_size_semaphore);
|
||||
+ percpu_down_write(&bdev->bd_block_size_semaphore);
|
||||
|
||||
/* Check that the block device is not memory mapped */
|
||||
mapping = bdev->bd_inode->i_mapping;
|
||||
@@ -135,7 +135,7 @@
|
||||
if (!prio_tree_empty(&mapping->i_mmap) ||
|
||||
!list_empty(&mapping->i_mmap_nonlinear)) {
|
||||
mutex_unlock(&mapping->i_mmap_mutex);
|
||||
- up_write(&bdev->bd_block_size_semaphore);
|
||||
+ percpu_up_write(&bdev->bd_block_size_semaphore);
|
||||
return -EBUSY;
|
||||
}
|
||||
mutex_unlock(&mapping->i_mmap_mutex);
|
||||
@@ -148,7 +148,7 @@
|
||||
kill_bdev(bdev);
|
||||
}
|
||||
|
||||
- up_write(&bdev->bd_block_size_semaphore);
|
||||
+ percpu_up_write(&bdev->bd_block_size_semaphore);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -460,6 +460,12 @@
|
||||
struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
|
||||
if (!ei)
|
||||
return NULL;
|
||||
+
|
||||
+ if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) {
|
||||
+ kmem_cache_free(bdev_cachep, ei);
|
||||
+ return NULL;
|
||||
+ }
|
||||
+
|
||||
return &ei->vfs_inode;
|
||||
}
|
||||
|
||||
@@ -468,6 +474,8 @@
|
||||
struct inode *inode = container_of(head, struct inode, i_rcu);
|
||||
struct bdev_inode *bdi = BDEV_I(inode);
|
||||
|
||||
+ percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore);
|
||||
+
|
||||
kmem_cache_free(bdev_cachep, bdi);
|
||||
}
|
||||
|
||||
@@ -491,7 +499,6 @@
|
||||
inode_init_once(&ei->vfs_inode);
|
||||
/* Initialize mutex for freeze. */
|
||||
mutex_init(&bdev->bd_fsfreeze_mutex);
|
||||
- init_rwsem(&bdev->bd_block_size_semaphore);
|
||||
}
|
||||
|
||||
static inline void __bd_forget(struct inode *inode)
|
||||
@@ -1593,11 +1600,11 @@
|
||||
ssize_t ret;
|
||||
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
|
||||
|
||||
- down_read(&bdev->bd_block_size_semaphore);
|
||||
+ percpu_down_read(&bdev->bd_block_size_semaphore);
|
||||
|
||||
ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
|
||||
|
||||
- up_read(&bdev->bd_block_size_semaphore);
|
||||
+ percpu_up_read(&bdev->bd_block_size_semaphore);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1622,7 +1629,7 @@
|
||||
|
||||
blk_start_plug(&plug);
|
||||
|
||||
- down_read(&bdev->bd_block_size_semaphore);
|
||||
+ percpu_down_read(&bdev->bd_block_size_semaphore);
|
||||
|
||||
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
|
||||
if (ret > 0 || ret == -EIOCBQUEUED) {
|
||||
@@ -1633,7 +1640,7 @@
|
||||
ret = err;
|
||||
}
|
||||
|
||||
- up_read(&bdev->bd_block_size_semaphore);
|
||||
+ percpu_up_read(&bdev->bd_block_size_semaphore);
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
@@ -1646,11 +1653,11 @@
|
||||
int ret;
|
||||
struct block_device *bdev = I_BDEV(file->f_mapping->host);
|
||||
|
||||
- down_read(&bdev->bd_block_size_semaphore);
|
||||
+ percpu_down_read(&bdev->bd_block_size_semaphore);
|
||||
|
||||
ret = generic_file_mmap(file, vma);
|
||||
|
||||
- up_read(&bdev->bd_block_size_semaphore);
|
||||
+ percpu_up_read(&bdev->bd_block_size_semaphore);
|
||||
|
||||
return ret;
|
||||
}
|
||||
Index: linux-3.6.x86_64/include/linux/fs.h
|
||||
===================================================================
|
||||
--- linux-3.6.x86_64.orig/include/linux/fs.h 2012-11-16 17:12:37.424002387 -0500
|
||||
+++ linux-3.6.x86_64/include/linux/fs.h 2012-11-16 17:28:12.578901349 -0500
|
||||
@@ -415,6 +415,7 @@
|
||||
#include <linux/migrate_mode.h>
|
||||
#include <linux/uidgid.h>
|
||||
#include <linux/lockdep.h>
|
||||
+#include <linux/percpu-rwsem.h>
|
||||
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
@@ -725,7 +726,7 @@
|
||||
/* Mutex for freeze */
|
||||
struct mutex bd_fsfreeze_mutex;
|
||||
/* A semaphore that prevents I/O while block size is being changed */
|
||||
- struct rw_semaphore bd_block_size_semaphore;
|
||||
+ struct percpu_rw_semaphore bd_block_size_semaphore;
|
||||
};
|
||||
|
||||
/*
|
||||
Index: linux-3.6.x86_64/include/linux/percpu-rwsem.h
|
||||
===================================================================
|
||||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
||||
+++ linux-3.6.x86_64/include/linux/percpu-rwsem.h 2012-11-16 17:12:57.354936574 -0500
|
||||
@@ -0,0 +1,89 @@
|
||||
+#ifndef _LINUX_PERCPU_RWSEM_H
|
||||
+#define _LINUX_PERCPU_RWSEM_H
|
||||
+
|
||||
+#include <linux/mutex.h>
|
||||
+#include <linux/percpu.h>
|
||||
+#include <linux/rcupdate.h>
|
||||
+#include <linux/delay.h>
|
||||
+
|
||||
+struct percpu_rw_semaphore {
|
||||
+ unsigned __percpu *counters;
|
||||
+ bool locked;
|
||||
+ struct mutex mtx;
|
||||
+};
|
||||
+
|
||||
+static inline void percpu_down_read(struct percpu_rw_semaphore *p)
|
||||
+{
|
||||
+ rcu_read_lock();
|
||||
+ if (unlikely(p->locked)) {
|
||||
+ rcu_read_unlock();
|
||||
+ mutex_lock(&p->mtx);
|
||||
+ this_cpu_inc(*p->counters);
|
||||
+ mutex_unlock(&p->mtx);
|
||||
+ return;
|
||||
+ }
|
||||
+ this_cpu_inc(*p->counters);
|
||||
+ rcu_read_unlock();
|
||||
+}
|
||||
+
|
||||
+static inline void percpu_up_read(struct percpu_rw_semaphore *p)
|
||||
+{
|
||||
+ /*
|
||||
+ * On X86, write operation in this_cpu_dec serves as a memory unlock
|
||||
+ * barrier (i.e. memory accesses may be moved before the write, but
|
||||
+ * no memory accesses are moved past the write).
|
||||
+ * On other architectures this may not be the case, so we need smp_mb()
|
||||
+ * there.
|
||||
+ */
|
||||
+#if defined(CONFIG_X86) && (!defined(CONFIG_X86_PPRO_FENCE) && !defined(CONFIG_X86_OOSTORE))
|
||||
+ barrier();
|
||||
+#else
|
||||
+ smp_mb();
|
||||
+#endif
|
||||
+ this_cpu_dec(*p->counters);
|
||||
+}
|
||||
+
|
||||
+static inline unsigned __percpu_count(unsigned __percpu *counters)
|
||||
+{
|
||||
+ unsigned total = 0;
|
||||
+ int cpu;
|
||||
+
|
||||
+ for_each_possible_cpu(cpu)
|
||||
+ total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu));
|
||||
+
|
||||
+ return total;
|
||||
+}
|
||||
+
|
||||
+static inline void percpu_down_write(struct percpu_rw_semaphore *p)
|
||||
+{
|
||||
+ mutex_lock(&p->mtx);
|
||||
+ p->locked = true;
|
||||
+ synchronize_rcu();
|
||||
+ while (__percpu_count(p->counters))
|
||||
+ msleep(1);
|
||||
+ smp_rmb(); /* paired with smp_mb() in percpu_sem_up_read() */
|
||||
+}
|
||||
+
|
||||
+static inline void percpu_up_write(struct percpu_rw_semaphore *p)
|
||||
+{
|
||||
+ p->locked = false;
|
||||
+ mutex_unlock(&p->mtx);
|
||||
+}
|
||||
+
|
||||
+static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p)
|
||||
+{
|
||||
+ p->counters = alloc_percpu(unsigned);
|
||||
+ if (unlikely(!p->counters))
|
||||
+ return -ENOMEM;
|
||||
+ p->locked = false;
|
||||
+ mutex_init(&p->mtx);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p)
|
||||
+{
|
||||
+ free_percpu(p->counters);
|
||||
+ p->counters = NULL; /* catch use after free bugs */
|
||||
+}
|
||||
+
|
||||
+#endif
|
|
@ -0,0 +1,74 @@
|
|||
Lock splice_read and splice_write functions
|
||||
|
||||
commit 1a25b1c4ce189e3926f2981f3302352a930086db
|
||||
Author: Mikulas Patocka <mpatocka@redhat.com>
|
||||
Date: Mon Oct 15 17:20:17 2012 -0400
|
||||
|
||||
Lock splice_read and splice_write functions
|
||||
|
||||
Functions generic_file_splice_read and generic_file_splice_write access
|
||||
the pagecache directly. For block devices these functions must be locked
|
||||
so that block size is not changed while they are in progress.
|
||||
|
||||
This patch is an additional fix for commit b87570f5d349 ("Fix a crash
|
||||
when block device is read and block size is changed at the same time")
|
||||
that locked aio_read, aio_write and mmap against block size change.
|
||||
|
||||
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
|
||||
Index: linux-3.6.x86_64/fs/block_dev.c
|
||||
===================================================================
|
||||
--- linux-3.6.x86_64.orig/fs/block_dev.c 2012-11-16 17:12:57.352936580 -0500
|
||||
+++ linux-3.6.x86_64/fs/block_dev.c 2012-11-16 17:13:11.908887989 -0500
|
||||
@@ -1662,6 +1662,39 @@
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static ssize_t blkdev_splice_read(struct file *file, loff_t *ppos,
|
||||
+ struct pipe_inode_info *pipe, size_t len,
|
||||
+ unsigned int flags)
|
||||
+{
|
||||
+ ssize_t ret;
|
||||
+ struct block_device *bdev = I_BDEV(file->f_mapping->host);
|
||||
+
|
||||
+ percpu_down_read(&bdev->bd_block_size_semaphore);
|
||||
+
|
||||
+ ret = generic_file_splice_read(file, ppos, pipe, len, flags);
|
||||
+
|
||||
+ percpu_up_read(&bdev->bd_block_size_semaphore);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static ssize_t blkdev_splice_write(struct pipe_inode_info *pipe,
|
||||
+ struct file *file, loff_t *ppos, size_t len,
|
||||
+ unsigned int flags)
|
||||
+{
|
||||
+ ssize_t ret;
|
||||
+ struct block_device *bdev = I_BDEV(file->f_mapping->host);
|
||||
+
|
||||
+ percpu_down_read(&bdev->bd_block_size_semaphore);
|
||||
+
|
||||
+ ret = generic_file_splice_write(pipe, file, ppos, len, flags);
|
||||
+
|
||||
+ percpu_up_read(&bdev->bd_block_size_semaphore);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+
|
||||
/*
|
||||
* Try to release a page associated with block device when the system
|
||||
* is under memory pressure.
|
||||
@@ -1700,8 +1733,8 @@
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = compat_blkdev_ioctl,
|
||||
#endif
|
||||
- .splice_read = generic_file_splice_read,
|
||||
- .splice_write = generic_file_splice_write,
|
||||
+ .splice_read = blkdev_splice_read,
|
||||
+ .splice_write = blkdev_splice_write,
|
||||
};
|
||||
|
||||
int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
|
15
kernel.spec
15
kernel.spec
|
@ -54,7 +54,7 @@ Summary: The Linux kernel
|
|||
# For non-released -rc kernels, this will be appended after the rcX and
|
||||
# gitX tags, so a 3 here would become part of release "0.rcX.gitX.3"
|
||||
#
|
||||
%global baserelease 1
|
||||
%global baserelease 2
|
||||
%global fedora_build %{baserelease}
|
||||
|
||||
# base_sublevel is the kernel version we're starting with and patching
|
||||
|
@ -716,6 +716,11 @@ Patch22114: iwlwifi-remove-queue-empty-warn-3.6.patch
|
|||
#rhbz 870562
|
||||
Patch22115: keyspan.patch
|
||||
|
||||
#rhbz 812129
|
||||
Patch22120: block-fix-a-crash-when-block-device-is.patch
|
||||
Patch22121: blockdev-turn-a-rw-semaphore-into-a-percpu-rw-sem.patch
|
||||
Patch22122: fs-lock-splice_read-and-splice_write-functions.patch
|
||||
|
||||
# END OF PATCH DEFINITIONS
|
||||
|
||||
%endif
|
||||
|
@ -1349,6 +1354,11 @@ ApplyPatch iwlwifi-remove-queue-empty-warn-3.6.patch
|
|||
#rhbz 870562
|
||||
ApplyPatch keyspan.patch
|
||||
|
||||
#rhbz 812129
|
||||
ApplyPatch block-fix-a-crash-when-block-device-is.patch
|
||||
ApplyPatch blockdev-turn-a-rw-semaphore-into-a-percpu-rw-sem.patch
|
||||
ApplyPatch fs-lock-splice_read-and-splice_write-functions.patch
|
||||
|
||||
# END OF PATCH APPLICATIONS
|
||||
|
||||
%endif
|
||||
|
@ -2049,6 +2059,9 @@ fi
|
|||
# and build.
|
||||
|
||||
%changelog
|
||||
* Mon Nov 19 2012 Josh Boyer <jwboyer@redhat.com>
|
||||
- Apply patches from Jeff Moyer to fix direct-io oops (rhbz 812129)
|
||||
|
||||
* Sat Nov 17 2012 Justin M. Forbes <jforbes@linuxtx.org> - 3.6.7-1
|
||||
- linux 3.6.7
|
||||
|
||||
|
|
Loading…
Reference in New Issue