Linux 2.6.35.5

Drop merged patches:
01-compat-make-compat_alloc_user_space-incorporate-the-access_ok-check.patch
02-compat-test-rax-for-the-system-call-number-not-eax.patch
03-compat-retruncate-rax-after-ia32-syscall-entry-tracing.patch
direct-io-move-aio_complete-into-end_io.patch
ext4-move-aio-completion-after-unwritten-extent-conversion.patch
xfs-move-aio-completion-after-unwritten-extent-conversion.patch
alsa-seq-oss-fix-double-free-at-error-path-of-snd_seq_oss_open.patch
This commit is contained in:
Chuck Ebbert 2010-09-20 21:04:51 -04:00
parent 42589b1be2
commit 7a68cb0b55
9 changed files with 14 additions and 794 deletions

View File

@ -1,189 +0,0 @@
From c41d68a513c71e35a14f66d71782d27a79a81ea6 Mon Sep 17 00:00:00 2001
From: H. Peter Anvin <hpa@linux.intel.com>
Date: Tue, 7 Sep 2010 16:16:18 -0700
Subject: [PATCH] compat: Make compat_alloc_user_space() incorporate the access_ok()
compat_alloc_user_space() expects the caller to independently call
access_ok() to verify the returned area. A missing call could
introduce problems on some architectures.
This patch incorporates the access_ok() check into
compat_alloc_user_space() and also adds a sanity check on the length.
The existing compat_alloc_user_space() implementations are renamed
arch_compat_alloc_user_space() and are used as part of the
implementation of the new global function.
This patch assumes NULL will cause __get_user()/__put_user() to either
fail or access userspace on all architectures. This should be
followed by checking the return value of compat_access_user_space()
for NULL in the callers, at which time the access_ok() in the callers
can also be removed.
Reported-by: Ben Hawkes <hawkes@sota.gen.nz>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Tony Luck <tony.luck@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: James Bottomley <jejb@parisc-linux.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: <stable@kernel.org>
---
arch/ia64/include/asm/compat.h | 2 +-
arch/mips/include/asm/compat.h | 2 +-
arch/parisc/include/asm/compat.h | 2 +-
arch/powerpc/include/asm/compat.h | 2 +-
arch/s390/include/asm/compat.h | 2 +-
arch/sparc/include/asm/compat.h | 2 +-
arch/x86/include/asm/compat.h | 2 +-
include/linux/compat.h | 3 +++
kernel/compat.c | 21 +++++++++++++++++++++
10 files changed, 32 insertions(+), 8 deletions(-)
diff --git a/arch/ia64/include/asm/compat.h b/arch/ia64/include/asm/compat.h
index f90edc8..9301a28 100644
--- a/arch/ia64/include/asm/compat.h
+++ b/arch/ia64/include/asm/compat.h
@@ -199,7 +199,7 @@ ptr_to_compat(void __user *uptr)
}
static __inline__ void __user *
-compat_alloc_user_space (long len)
+arch_compat_alloc_user_space (long len)
{
struct pt_regs *regs = task_pt_regs(current);
return (void __user *) (((regs->r12 & 0xffffffff) & -16) - len);
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index 613f691..dbc5106 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -145,7 +145,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
return (u32)(unsigned long)uptr;
}
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = (struct pt_regs *)
((unsigned long) current_thread_info() + THREAD_SIZE - 32) - 1;
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index 02b77ba..efa0b60 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -147,7 +147,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
return (u32)(unsigned long)uptr;
}
-static __inline__ void __user *compat_alloc_user_space(long len)
+static __inline__ void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = &current->thread.regs;
return (void __user *)regs->gr[30];
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 396d21a..a11d4ea 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -134,7 +134,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
return (u32)(unsigned long)uptr;
}
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = current->thread.regs;
unsigned long usp = regs->gpr[1];
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 104f200..a875c2f 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -181,7 +181,7 @@ static inline int is_compat_task(void)
#endif
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
{
unsigned long stack;
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 5016f76..6f57325 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -167,7 +167,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
return (u32)(unsigned long)uptr;
}
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = current_thread_info()->kregs;
unsigned long usp = regs->u_regs[UREG_I6];
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 306160e..1d9cd27 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -205,7 +205,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
return (u32)(unsigned long)uptr;
}
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = task_pt_regs(current);
return (void __user *)regs->sp - len;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 9ddc878..5778b55 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -360,5 +360,8 @@ extern ssize_t compat_rw_copy_check_uvector(int type,
const struct compat_iovec __user *uvector, unsigned long nr_segs,
unsigned long fast_segs, struct iovec *fast_pointer,
struct iovec **ret_pointer);
+
+extern void __user *compat_alloc_user_space(unsigned long len);
+
#endif /* CONFIG_COMPAT */
#endif /* _LINUX_COMPAT_H */
diff --git a/kernel/compat.c b/kernel/compat.c
index e167efc..c9e2ec0 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -1126,3 +1126,24 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info)
return 0;
}
+
+/*
+ * Allocate user-space memory for the duration of a single system call,
+ * in order to marshall parameters inside a compat thunk.
+ */
+void __user *compat_alloc_user_space(unsigned long len)
+{
+ void __user *ptr;
+
+ /* If len would occupy more than half of the entire compat space... */
+ if (unlikely(len > (((compat_uptr_t)~0) >> 1)))
+ return NULL;
+
+ ptr = arch_compat_alloc_user_space(len);
+
+ if (unlikely(!access_ok(VERIFY_WRITE, ptr, len)))
+ return NULL;
+
+ return ptr;
+}
+EXPORT_SYMBOL_GPL(compat_alloc_user_space);
--
1.7.2.3

View File

@ -1,97 +0,0 @@
From 36d001c70d8a0144ac1d038f6876c484849a74de Mon Sep 17 00:00:00 2001
From: H. Peter Anvin <hpa@linux.intel.com>
Date: Tue, 14 Sep 2010 12:42:41 -0700
Subject: [PATCH] x86-64, compat: Test %rax for the syscall number, not %eax
On 64 bits, we always, by necessity, jump through the system call
table via %rax. For 32-bit system calls, in theory the system call
number is stored in %eax, and the code was testing %eax for a valid
system call number. At one point we loaded the stored value back from
the stack to enforce zero-extension, but that was removed in checkin
d4d67150165df8bf1cc05e532f6efca96f907cab. An actual 32-bit process
will not be able to introduce a non-zero-extended number, but it can
happen via ptrace.
Instead of re-introducing the zero-extension, test what we are
actually going to use, i.e. %rax. This only adds a handful of REX
prefixes to the code.
Reported-by: Ben Hawkes <hawkes@sota.gen.nz>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: <stable@kernel.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
arch/x86/ia32/ia32entry.S | 14 +++++++-------
1 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index b86feab..84e3a4e 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -153,7 +153,7 @@ ENTRY(ia32_sysenter_target)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
CFI_REMEMBER_STATE
jnz sysenter_tracesys
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
sysenter_do_call:
IA32_ARG_FIXUP
@@ -195,7 +195,7 @@ sysexit_from_sys_call:
movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
call audit_syscall_entry
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
movl %ebx,%edi /* reload 1st syscall arg */
movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
@@ -248,7 +248,7 @@ sysenter_tracesys:
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
jmp sysenter_do_call
CFI_ENDPROC
@@ -314,7 +314,7 @@ ENTRY(ia32_cstar_target)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
CFI_REMEMBER_STATE
jnz cstar_tracesys
- cmpl $IA32_NR_syscalls-1,%eax
+ cmpq $IA32_NR_syscalls-1,%rax
ja ia32_badsys
cstar_do_call:
IA32_ARG_FIXUP 1
@@ -367,7 +367,7 @@ cstar_tracesys:
LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
RESTORE_REST
xchgl %ebp,%r9d
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
jmp cstar_do_call
END(ia32_cstar_target)
@@ -425,7 +425,7 @@ ENTRY(ia32_syscall)
orl $TS_COMPAT,TI_status(%r10)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
jnz ia32_tracesys
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
ia32_do_call:
IA32_ARG_FIXUP
@@ -444,7 +444,7 @@ ia32_tracesys:
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call
END(ia32_syscall)
--
1.7.2.3

View File

@ -1,49 +0,0 @@
From eefdca043e8391dcd719711716492063030b55ac Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 14 Sep 2010 12:22:58 -0700
Subject: [PATCH] x86-64, compat: Retruncate rax after ia32 syscall entry tracing
In commit d4d6715, we reopened an old hole for a 64-bit ptracer touching a
32-bit tracee in system call entry. A %rax value set via ptrace at the
entry tracing stop gets used whole as a 32-bit syscall number, while we
only check the low 32 bits for validity.
Fix it by truncating %rax back to 32 bits after syscall_trace_enter,
in addition to testing the full 64 bits as has already been added.
Reported-by: Ben Hawkes <hawkes@sota.gen.nz>
Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
arch/x86/ia32/ia32entry.S | 8 +++++++-
1 files changed, 7 insertions(+), 1 deletions(-)
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 84e3a4e..518bb99 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -50,7 +50,12 @@
/*
* Reload arg registers from stack in case ptrace changed them.
* We don't reload %eax because syscall_trace_enter() returned
- * the value it wants us to use in the table lookup.
+ * the %rax value we should see. Instead, we just truncate that
+ * value to 32 bits again as we did on entry from user mode.
+ * If it's a new value set by user_regset during entry tracing,
+ * this matches the normal truncation of the user-mode value.
+ * If it's -1 to make us punt the syscall, then (u32)-1 is still
+ * an appropriately invalid value.
*/
.macro LOAD_ARGS32 offset, _r9=0
.if \_r9
@@ -60,6 +65,7 @@
movl \offset+48(%rsp),%edx
movl \offset+56(%rsp),%esi
movl \offset+64(%rsp),%edi
+ movl %eax,%eax /* zero extension */
.endm
.macro CFI_STARTPROC32 simple
--
1.7.2.3

View File

@ -1,53 +0,0 @@
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 6 Sep 2010 07:13:45 +0000 (+0200)
Subject: ALSA: seq/oss - Fix double-free at error path of snd_seq_oss_open()
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=27f7ad53829f79e799a253285318bff79ece15bd
ALSA: seq/oss - Fix double-free at error path of snd_seq_oss_open()
The error handling in snd_seq_oss_open() has several bad codes that
do dereferecing released pointers and double-free of kmalloc'ed data.
The object dp is release in free_devinfo() that is called via
private_free callback. The rest shouldn't touch this object any more.
The patch changes delete_port() to call kfree() in any case, and gets
rid of unnecessary calls of destructors in snd_seq_oss_open().
Fixes CVE-2010-3080.
Reported-and-tested-by: Tavis Ormandy <taviso@cmpxchg8b.com>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c
index 6857122..69cd7b3 100644
--- a/sound/core/seq/oss/seq_oss_init.c
+++ b/sound/core/seq/oss/seq_oss_init.c
@@ -281,13 +281,10 @@ snd_seq_oss_open(struct file *file, int level)
return 0;
_error:
- snd_seq_oss_writeq_delete(dp->writeq);
- snd_seq_oss_readq_delete(dp->readq);
snd_seq_oss_synth_cleanup(dp);
snd_seq_oss_midi_cleanup(dp);
- delete_port(dp);
delete_seq_queue(dp->queue);
- kfree(dp);
+ delete_port(dp);
return rc;
}
@@ -350,8 +347,10 @@ create_port(struct seq_oss_devinfo *dp)
static int
delete_port(struct seq_oss_devinfo *dp)
{
- if (dp->port < 0)
+ if (dp->port < 0) {
+ kfree(dp);
return 0;
+ }
debug_printk(("delete_port %i\n", dp->port));
return snd_seq_event_port_detach(dp->cseq, dp->port);

View File

@ -1,204 +0,0 @@
From: Christoph Hellwig <hch@infradead.org>
Date: Sun, 18 Jul 2010 21:17:09 +0000 (+0000)
Subject: direct-io: move aio_complete into ->end_io
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=40e2e97316af6e62affab7a392e792494b8d9dde
direct-io: move aio_complete into ->end_io
Filesystems with unwritten extent support must not complete an AIO request
until the transaction to convert the extent has been commited. That means
the aio_complete calls needs to be moved into the ->end_io callback so
that the filesystem can control when to call it exactly.
This makes a bit of a mess out of dio_complete and the ->end_io callback
prototype even more complicated.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 7600aac..a10cb91 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio)
* filesystems can use it to hold additional state between get_block calls and
* dio_complete.
*/
-static int dio_complete(struct dio *dio, loff_t offset, int ret)
+static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async)
{
ssize_t transferred = 0;
@@ -239,14 +239,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
transferred = dio->i_size - offset;
}
- if (dio->end_io && dio->result)
- dio->end_io(dio->iocb, offset, transferred,
- dio->map_bh.b_private);
-
- if (dio->flags & DIO_LOCKING)
- /* lockdep: non-owner release */
- up_read_non_owner(&dio->inode->i_alloc_sem);
-
if (ret == 0)
ret = dio->page_errors;
if (ret == 0)
@@ -254,6 +246,17 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
if (ret == 0)
ret = transferred;
+ if (dio->end_io && dio->result) {
+ dio->end_io(dio->iocb, offset, transferred,
+ dio->map_bh.b_private, ret, is_async);
+ } else if (is_async) {
+ aio_complete(dio->iocb, ret, 0);
+ }
+
+ if (dio->flags & DIO_LOCKING)
+ /* lockdep: non-owner release */
+ up_read_non_owner(&dio->inode->i_alloc_sem);
+
return ret;
}
@@ -277,8 +280,7 @@ static void dio_bio_end_aio(struct bio *bio, int error)
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (remaining == 0) {
- int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
- aio_complete(dio->iocb, ret, 0);
+ dio_complete(dio, dio->iocb->ki_pos, 0, true);
kfree(dio);
}
}
@@ -1126,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (ret2 == 0) {
- ret = dio_complete(dio, offset, ret);
+ ret = dio_complete(dio, offset, ret, false);
kfree(dio);
} else
BUG_ON(ret != -EIOCBQUEUED);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 42272d6..0afc8c1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3775,7 +3775,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
}
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
- ssize_t size, void *private)
+ ssize_t size, void *private, int ret,
+ bool is_async)
{
ext4_io_end_t *io_end = iocb->private;
struct workqueue_struct *wq;
@@ -3784,7 +3785,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
/* if not async direct IO or dio with 0 bytes write, just return */
if (!io_end || !size)
- return;
+ goto out;
ext_debug("ext4_end_io_dio(): io_end 0x%p"
"for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
@@ -3795,7 +3796,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
if (io_end->flag != EXT4_IO_UNWRITTEN){
ext4_free_io_end(io_end);
iocb->private = NULL;
- return;
+ goto out;
}
io_end->offset = offset;
@@ -3812,6 +3813,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
iocb->private = NULL;
+out:
+ if (is_async)
+ aio_complete(iocb, ret, 0);
}
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 356e976..96337a4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -578,7 +578,9 @@ bail:
static void ocfs2_dio_end_io(struct kiocb *iocb,
loff_t offset,
ssize_t bytes,
- void *private)
+ void *private,
+ int ret,
+ bool is_async)
{
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
int level;
@@ -592,6 +594,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
if (!level)
up_read(&inode->i_alloc_sem);
ocfs2_rw_unlock(inode, level);
+
+ if (is_async)
+ aio_complete(iocb, ret, 0);
}
/*
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 8abbf05..95d1e26 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1406,7 +1406,9 @@ xfs_end_io_direct(
struct kiocb *iocb,
loff_t offset,
ssize_t size,
- void *private)
+ void *private,
+ int ret,
+ bool is_async)
{
xfs_ioend_t *ioend = iocb->private;
@@ -1452,6 +1454,9 @@ xfs_end_io_direct(
* against double-freeing.
*/
iocb->private = NULL;
+
+ if (is_async)
+ aio_complete(iocb, ret, 0);
}
STATIC ssize_t
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 319da17..c5057fb 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -37,6 +37,8 @@ typedef struct xfs_ioend {
size_t io_size; /* size of the extent */
xfs_off_t io_offset; /* offset in the file */
struct work_struct io_work; /* xfsdatad work queue */
+ struct kiocb *io_iocb;
+ int io_result;
} xfs_ioend_t;
extern const struct address_space_operations xfs_address_space_operations;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 68ca1b0..f91affb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -415,7 +415,8 @@ struct buffer_head;
typedef int (get_block_t)(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
- ssize_t bytes, void *private);
+ ssize_t bytes, void *private, int ret,
+ bool is_async);
/*
* Attribute flags. These should be or-ed together to figure out what

View File

@ -1,95 +0,0 @@
From: jiayingz@google.com (Jiaying Zhang) <>
Date: Tue, 27 Jul 2010 15:56:06 +0000 (-0400)
Subject: ext4: move aio completion after unwritten extent conversion
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftytso%2Fext4.git;a=commitdiff_plain;h=5b3ff237bef43b9e7fb7d1eb858e29b73fd664f9
ext4: move aio completion after unwritten extent conversion
This patch is to be applied upon Christoph's "direct-io: move aio_complete
into ->end_io" patch. It adds iocb and result fields to struct ext4_io_end_t,
so that we can call aio_complete from ext4_end_io_nolock() after the extent
conversion has finished.
I have verified with Christoph's aio-dio test that used to fail after a few
runs on an original kernel but now succeeds on the patched kernel.
See http://thread.gmane.org/gmane.comp.file-systems.ext4/19659 for details.
Signed-off-by: Jiaying Zhang <jiayingz@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4c7d472..fbb3947 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -170,13 +170,15 @@ struct mpage_da_data {
};
#define EXT4_IO_UNWRITTEN 0x1
typedef struct ext4_io_end {
- struct list_head list; /* per-file finished AIO list */
+ struct list_head list; /* per-file finished IO list */
struct inode *inode; /* file being written to */
unsigned int flag; /* unwritten or not */
struct page *page; /* page struct for buffer write */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
struct work_struct work; /* data work queue */
+ struct kiocb *iocb; /* iocb struct for AIO */
+ int result; /* error value for AIO */
} ext4_io_end_t;
/*
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 609159e..46d2079 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3668,6 +3668,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io)
return ret;
}
+ if (io->iocb)
+ aio_complete(io->iocb, io->result, 0);
/* clear the DIO AIO unwritten flag */
io->flag = 0;
return ret;
@@ -3767,6 +3769,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
io->offset = 0;
io->size = 0;
io->page = NULL;
+ io->iocb = NULL;
+ io->result = 0;
INIT_WORK(&io->work, ext4_end_io_work);
INIT_LIST_HEAD(&io->list);
}
@@ -3796,12 +3800,18 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
if (io_end->flag != EXT4_IO_UNWRITTEN){
ext4_free_io_end(io_end);
iocb->private = NULL;
- goto out;
+out:
+ if (is_async)
+ aio_complete(iocb, ret, 0);
+ return;
}
io_end->offset = offset;
io_end->size = size;
- io_end->flag = EXT4_IO_UNWRITTEN;
+ if (is_async) {
+ io_end->iocb = iocb;
+ io_end->result = ret;
+ }
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
/* queue the work to convert unwritten extents to written */
@@ -3813,9 +3823,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
iocb->private = NULL;
-out:
- if (is_async)
- aio_complete(iocb, ret, 0);
}
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)

View File

@ -48,7 +48,7 @@ Summary: The Linux kernel
# reset this by hand to 1 (or to 0 and then use rpmdev-bumpspec).
# scripts/rebase.sh should be made to do that for you, actually.
#
%global baserelease 28
%global baserelease 29
%global fedora_build %{baserelease}
# base_sublevel is the kernel version we're starting with and patching
@ -60,7 +60,7 @@ Summary: The Linux kernel
%if 0%{?released_kernel}
# Do we have a -stable update to apply?
%define stable_update 4
%define stable_update 5
# Is it a -stable RC?
%define stable_rc 0
# Set rpm version accordingly
@ -606,10 +606,6 @@ Patch30: git-utrace.patch
Patch31: utrace-ptrace-fix-build.patch
Patch32: utrace-remove-use-of-kref_set.patch
Patch101: 01-compat-make-compat_alloc_user_space-incorporate-the-access_ok-check.patch
Patch102: 02-compat-test-rax-for-the-system-call-number-not-eax.patch
Patch103: 03-compat-retruncate-rax-after-ia32-syscall-entry-tracing.patch
Patch150: linux-2.6.29-sparc-IOC_TYPECHECK.patch
Patch160: linux-2.6-32bit-mmap-exec-randomization.patch
@ -724,11 +720,6 @@ Patch12020: hid-support-tivo-slide-remote.patch
Patch12040: only-use-alpha2-regulatory-information-from-country-IE.patch
# rhbz #617699
Patch12050: direct-io-move-aio_complete-into-end_io.patch
Patch12060: ext4-move-aio-completion-after-unwritten-extent-conversion.patch
Patch12070: xfs-move-aio-completion-after-unwritten-extent-conversion.patch
Patch12080: kprobes-x86-fix-kprobes-to-skip-prefixes-correctly.patch
# rhbz #622149
@ -745,8 +736,6 @@ Patch12520: execve-improve-interactivity-with-large-arguments.patch
Patch12521: execve-make-responsive-to-sigkill-with-large-arguments.patch
Patch12522: setup_arg_pages-diagnose-excessive-argument-size.patch
# CVE-2010-3080
Patch12530: alsa-seq-oss-fix-double-free-at-error-path-of-snd_seq_oss_open.patch
# CVE-2010-2954
Patch12540: irda-correctly-clean-up-self-ias_obj-on-irda_bind-failure.patch
# CVE-2010-2960
@ -1180,9 +1169,6 @@ ApplyPatch utrace-remove-use-of-kref_set.patch
# Architecture patches
# x86(-64)
ApplyPatch 01-compat-make-compat_alloc_user_space-incorporate-the-access_ok-check.patch
ApplyPatch 02-compat-test-rax-for-the-system-call-number-not-eax.patch
ApplyPatch 03-compat-retruncate-rax-after-ia32-syscall-entry-tracing.patch
#
# Intel IOMMU
@ -1361,11 +1347,6 @@ ApplyPatch neuter_intel_microcode_load.patch
ApplyPatch only-use-alpha2-regulatory-information-from-country-IE.patch
# rhbz #617699
ApplyPatch direct-io-move-aio_complete-into-end_io.patch
ApplyPatch ext4-move-aio-completion-after-unwritten-extent-conversion.patch
ApplyPatch xfs-move-aio-completion-after-unwritten-extent-conversion.patch
# bz 610941
ApplyPatch kprobes-x86-fix-kprobes-to-skip-prefixes-correctly.patch
@ -1384,8 +1365,6 @@ ApplyPatch execve-improve-interactivity-with-large-arguments.patch
ApplyPatch execve-make-responsive-to-sigkill-with-large-arguments.patch
ApplyPatch setup_arg_pages-diagnose-excessive-argument-size.patch
# CVE-2010-3080
ApplyPatch alsa-seq-oss-fix-double-free-at-error-path-of-snd_seq_oss_open.patch
# CVE-2010-2954
ApplyPatch irda-correctly-clean-up-self-ias_obj-on-irda_bind-failure.patch
# CVE-2010-2960
@ -1978,6 +1957,17 @@ fi
# and build.
%changelog
* Mon Sep 20 2010 Chuck Ebbert <cebbert@redhat.com> 2.6.35.5-29
- Linux 2.6.35.5
- Drop merged patches:
01-compat-make-compat_alloc_user_space-incorporate-the-access_ok-check.patch
02-compat-test-rax-for-the-system-call-number-not-eax.patch
03-compat-retruncate-rax-after-ia32-syscall-entry-tracing.patch
direct-io-move-aio_complete-into-end_io.patch
ext4-move-aio-completion-after-unwritten-extent-conversion.patch
xfs-move-aio-completion-after-unwritten-extent-conversion.patch
alsa-seq-oss-fix-double-free-at-error-path-of-snd_seq_oss_open.patch
* Thu Sep 16 2010 Dennis Gilmore <dennis@ausil.us>
- build sparc headers on sparcv9
- disable some modules to enable the kernel to build on sparc

View File

@ -1,2 +1,2 @@
091abeb4684ce03d1d936851618687b6 linux-2.6.35.tar.bz2
738f762746488345b1a8707d00895eef patch-2.6.35.4.bz2
5bf1900e4ea72b8c65f4f7aae0a28d14 patch-2.6.35.5.bz2

View File

@ -1,83 +0,0 @@
From: Christoph Hellwig <hch@infradead.org>
Date: Sun, 18 Jul 2010 21:17:10 +0000 (+0000)
Subject: xfs: move aio completion after unwritten extent conversion
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=fb511f2150174b18b28ad54708c1adda0df39b17
xfs: move aio completion after unwritten extent conversion
If we write into an unwritten extent using AIO we need to complete the AIO
request after the extent conversion has finished. Without that a read could
race to see see the extent still unwritten and return zeros. For synchronous
I/O we already take care of that by flushing the xfsconvertd workqueue (which
might be a bit of overkill).
To do that add iocb and result fields to struct xfs_ioend, so that we can
call aio_complete from xfs_end_io after the extent conversion has happened.
Note that we need a new result field as io_error is used for positive errno
values, while the AIO code can return negative error values and positive
transfer sizes.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 95d1e26..13622d5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -265,8 +265,11 @@ xfs_end_io(
xfs_finish_ioend(ioend, 0);
/* ensure we don't spin on blocked ioends */
delay(1);
- } else
+ } else {
+ if (ioend->io_iocb)
+ aio_complete(ioend->io_iocb, ioend->io_result, 0);
xfs_destroy_ioend(ioend);
+ }
}
/*
@@ -299,6 +302,8 @@ xfs_alloc_ioend(
atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
ioend->io_offset = 0;
ioend->io_size = 0;
+ ioend->io_iocb = NULL;
+ ioend->io_result = 0;
INIT_WORK(&ioend->io_work, xfs_end_io);
return ioend;
@@ -1411,6 +1416,7 @@ xfs_end_io_direct(
bool is_async)
{
xfs_ioend_t *ioend = iocb->private;
+ bool complete_aio = is_async;
/*
* Non-NULL private data means we need to issue a transaction to
@@ -1436,7 +1442,14 @@ xfs_end_io_direct(
if (ioend->io_type == IO_READ) {
xfs_finish_ioend(ioend, 0);
} else if (private && size > 0) {
- xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
+ if (is_async) {
+ ioend->io_iocb = iocb;
+ ioend->io_result = ret;
+ complete_aio = false;
+ xfs_finish_ioend(ioend, 0);
+ } else {
+ xfs_finish_ioend(ioend, 1);
+ }
} else {
/*
* A direct I/O write ioend starts it's life in unwritten
@@ -1455,7 +1468,7 @@ xfs_end_io_direct(
*/
iocb->private = NULL;
- if (is_async)
+ if (complete_aio)
aio_complete(iocb, ret, 0);
}