Linux v4.3.3
This commit is contained in:
parent
be9160e268
commit
e7ca3b90d2
|
@ -1,288 +0,0 @@
|
||||||
From 0305cd5f7fca85dae392b9ba85b116896eb7c1c7 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Filipe Manana <fdmanana@suse.com>
|
|
||||||
Date: Fri, 16 Oct 2015 12:34:25 +0100
|
|
||||||
Subject: [PATCH] Btrfs: fix truncation of compressed and inlined extents
|
|
||||||
|
|
||||||
When truncating a file to a smaller size which consists of an inline
|
|
||||||
extent that is compressed, we did not discard (or made unusable) the
|
|
||||||
data between the new file size and the old file size, wasting metadata
|
|
||||||
space and allowing for the truncated data to be leaked and the data
|
|
||||||
corruption/loss mentioned below.
|
|
||||||
We were also not correctly decrementing the number of bytes used by the
|
|
||||||
inode, we were setting it to zero, giving a wrong report for callers of
|
|
||||||
the stat(2) syscall. The fsck tool also reported an error about a mismatch
|
|
||||||
between the nbytes of the file versus the real space used by the file.
|
|
||||||
|
|
||||||
Now because we weren't discarding the truncated region of the file, it
|
|
||||||
was possible for a caller of the clone ioctl to actually read the data
|
|
||||||
that was truncated, allowing for a security breach without requiring root
|
|
||||||
access to the system, using only standard filesystem operations. The
|
|
||||||
scenario is the following:
|
|
||||||
|
|
||||||
1) User A creates a file which consists of an inline and compressed
|
|
||||||
extent with a size of 2000 bytes - the file is not accessible to
|
|
||||||
any other users (no read, write or execution permission for anyone
|
|
||||||
else);
|
|
||||||
|
|
||||||
2) The user truncates the file to a size of 1000 bytes;
|
|
||||||
|
|
||||||
3) User A makes the file world readable;
|
|
||||||
|
|
||||||
4) User B creates a file consisting of an inline extent of 2000 bytes;
|
|
||||||
|
|
||||||
5) User B issues a clone operation from user A's file into its own
|
|
||||||
file (using a length argument of 0, clone the whole range);
|
|
||||||
|
|
||||||
6) User B now gets to see the 1000 bytes that user A truncated from
|
|
||||||
its file before it made its file world readbale. User B also lost
|
|
||||||
the bytes in the range [1000, 2000[ bytes from its own file, but
|
|
||||||
that might be ok if his/her intention was reading stale data from
|
|
||||||
user A that was never supposed to be public.
|
|
||||||
|
|
||||||
Note that this contrasts with the case where we truncate a file from 2000
|
|
||||||
bytes to 1000 bytes and then truncate it back from 1000 to 2000 bytes. In
|
|
||||||
this case reading any byte from the range [1000, 2000[ will return a value
|
|
||||||
of 0x00, instead of the original data.
|
|
||||||
|
|
||||||
This problem exists since the clone ioctl was added and happens both with
|
|
||||||
and without my recent data loss and file corruption fixes for the clone
|
|
||||||
ioctl (patch "Btrfs: fix file corruption and data loss after cloning
|
|
||||||
inline extents").
|
|
||||||
|
|
||||||
So fix this by truncating the compressed inline extents as we do for the
|
|
||||||
non-compressed case, which involves decompressing, if the data isn't already
|
|
||||||
in the page cache, compressing the truncated version of the extent, writing
|
|
||||||
the compressed content into the inline extent and then truncate it.
|
|
||||||
|
|
||||||
The following test case for fstests reproduces the problem. In order for
|
|
||||||
the test to pass both this fix and my previous fix for the clone ioctl
|
|
||||||
that forbids cloning a smaller inline extent into a larger one,
|
|
||||||
which is titled "Btrfs: fix file corruption and data loss after cloning
|
|
||||||
inline extents", are needed. Without that other fix the test fails in a
|
|
||||||
different way that does not leak the truncated data, instead part of
|
|
||||||
destination file gets replaced with zeroes (because the destination file
|
|
||||||
has a larger inline extent than the source).
|
|
||||||
|
|
||||||
seq=`basename $0`
|
|
||||||
seqres=$RESULT_DIR/$seq
|
|
||||||
echo "QA output created by $seq"
|
|
||||||
tmp=/tmp/$$
|
|
||||||
status=1 # failure is the default!
|
|
||||||
trap "_cleanup; exit \$status" 0 1 2 3 15
|
|
||||||
|
|
||||||
_cleanup()
|
|
||||||
{
|
|
||||||
rm -f $tmp.*
|
|
||||||
}
|
|
||||||
|
|
||||||
# get standard environment, filters and checks
|
|
||||||
. ./common/rc
|
|
||||||
. ./common/filter
|
|
||||||
|
|
||||||
# real QA test starts here
|
|
||||||
_need_to_be_root
|
|
||||||
_supported_fs btrfs
|
|
||||||
_supported_os Linux
|
|
||||||
_require_scratch
|
|
||||||
_require_cloner
|
|
||||||
|
|
||||||
rm -f $seqres.full
|
|
||||||
|
|
||||||
_scratch_mkfs >>$seqres.full 2>&1
|
|
||||||
_scratch_mount "-o compress"
|
|
||||||
|
|
||||||
# Create our test files. File foo is going to be the source of a clone operation
|
|
||||||
# and consists of a single inline extent with an uncompressed size of 512 bytes,
|
|
||||||
# while file bar consists of a single inline extent with an uncompressed size of
|
|
||||||
# 256 bytes. For our test's purpose, it's important that file bar has an inline
|
|
||||||
# extent with a size smaller than foo's inline extent.
|
|
||||||
$XFS_IO_PROG -f -c "pwrite -S 0xa1 0 128" \
|
|
||||||
-c "pwrite -S 0x2a 128 384" \
|
|
||||||
$SCRATCH_MNT/foo | _filter_xfs_io
|
|
||||||
$XFS_IO_PROG -f -c "pwrite -S 0xbb 0 256" $SCRATCH_MNT/bar | _filter_xfs_io
|
|
||||||
|
|
||||||
# Now durably persist all metadata and data. We do this to make sure that we get
|
|
||||||
# on disk an inline extent with a size of 512 bytes for file foo.
|
|
||||||
sync
|
|
||||||
|
|
||||||
# Now truncate our file foo to a smaller size. Because it consists of a
|
|
||||||
# compressed and inline extent, btrfs did not shrink the inline extent to the
|
|
||||||
# new size (if the extent was not compressed, btrfs would shrink it to 128
|
|
||||||
# bytes), it only updates the inode's i_size to 128 bytes.
|
|
||||||
$XFS_IO_PROG -c "truncate 128" $SCRATCH_MNT/foo
|
|
||||||
|
|
||||||
# Now clone foo's inline extent into bar.
|
|
||||||
# This clone operation should fail with errno EOPNOTSUPP because the source
|
|
||||||
# file consists only of an inline extent and the file's size is smaller than
|
|
||||||
# the inline extent of the destination (128 bytes < 256 bytes). However the
|
|
||||||
# clone ioctl was not prepared to deal with a file that has a size smaller
|
|
||||||
# than the size of its inline extent (something that happens only for compressed
|
|
||||||
# inline extents), resulting in copying the full inline extent from the source
|
|
||||||
# file into the destination file.
|
|
||||||
#
|
|
||||||
# Note that btrfs' clone operation for inline extents consists of removing the
|
|
||||||
# inline extent from the destination inode and copy the inline extent from the
|
|
||||||
# source inode into the destination inode, meaning that if the destination
|
|
||||||
# inode's inline extent is larger (N bytes) than the source inode's inline
|
|
||||||
# extent (M bytes), some bytes (N - M bytes) will be lost from the destination
|
|
||||||
# file. Btrfs could copy the source inline extent's data into the destination's
|
|
||||||
# inline extent so that we would not lose any data, but that's currently not
|
|
||||||
# done due to the complexity that would be needed to deal with such cases
|
|
||||||
# (specially when one or both extents are compressed), returning EOPNOTSUPP, as
|
|
||||||
# it's normally not a very common case to clone very small files (only case
|
|
||||||
# where we get inline extents) and copying inline extents does not save any
|
|
||||||
# space (unlike for normal, non-inlined extents).
|
|
||||||
$CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/foo $SCRATCH_MNT/bar
|
|
||||||
|
|
||||||
# Now because the above clone operation used to succeed, and due to foo's inline
|
|
||||||
# extent not being shinked by the truncate operation, our file bar got the whole
|
|
||||||
# inline extent copied from foo, making us lose the last 128 bytes from bar
|
|
||||||
# which got replaced by the bytes in range [128, 256[ from foo before foo was
|
|
||||||
# truncated - in other words, data loss from bar and being able to read old and
|
|
||||||
# stale data from foo that should not be possible to read anymore through normal
|
|
||||||
# filesystem operations. Contrast with the case where we truncate a file from a
|
|
||||||
# size N to a smaller size M, truncate it back to size N and then read the range
|
|
||||||
# [M, N[, we should always get the value 0x00 for all the bytes in that range.
|
|
||||||
|
|
||||||
# We expected the clone operation to fail with errno EOPNOTSUPP and therefore
|
|
||||||
# not modify our file's bar data/metadata. So its content should be 256 bytes
|
|
||||||
# long with all bytes having the value 0xbb.
|
|
||||||
#
|
|
||||||
# Without the btrfs bug fix, the clone operation succeeded and resulted in
|
|
||||||
# leaking truncated data from foo, the bytes that belonged to its range
|
|
||||||
# [128, 256[, and losing data from bar in that same range. So reading the
|
|
||||||
# file gave us the following content:
|
|
||||||
#
|
|
||||||
# 0000000 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1
|
|
||||||
# *
|
|
||||||
# 0000200 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a
|
|
||||||
# *
|
|
||||||
# 0000400
|
|
||||||
echo "File bar's content after the clone operation:"
|
|
||||||
od -t x1 $SCRATCH_MNT/bar
|
|
||||||
|
|
||||||
# Also because the foo's inline extent was not shrunk by the truncate
|
|
||||||
# operation, btrfs' fsck, which is run by the fstests framework everytime a
|
|
||||||
# test completes, failed reporting the following error:
|
|
||||||
#
|
|
||||||
# root 5 inode 257 errors 400, nbytes wrong
|
|
||||||
|
|
||||||
status=0
|
|
||||||
exit
|
|
||||||
|
|
||||||
Cc: stable@vger.kernel.org
|
|
||||||
Signed-off-by: Filipe Manana <fdmanana@suse.com>
|
|
||||||
---
|
|
||||||
fs/btrfs/inode.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++----------
|
|
||||||
1 file changed, 68 insertions(+), 14 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
|
|
||||||
index 208db4e835f0..cbb4286490a1 100644
|
|
||||||
--- a/fs/btrfs/inode.c
|
|
||||||
+++ b/fs/btrfs/inode.c
|
|
||||||
@@ -4217,6 +4217,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
+static int truncate_inline_extent(struct inode *inode,
|
|
||||||
+ struct btrfs_path *path,
|
|
||||||
+ struct btrfs_key *found_key,
|
|
||||||
+ const u64 item_end,
|
|
||||||
+ const u64 new_size)
|
|
||||||
+{
|
|
||||||
+ struct extent_buffer *leaf = path->nodes[0];
|
|
||||||
+ int slot = path->slots[0];
|
|
||||||
+ struct btrfs_file_extent_item *fi;
|
|
||||||
+ u32 size = (u32)(new_size - found_key->offset);
|
|
||||||
+ struct btrfs_root *root = BTRFS_I(inode)->root;
|
|
||||||
+
|
|
||||||
+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
|
|
||||||
+
|
|
||||||
+ if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
|
|
||||||
+ loff_t offset = new_size;
|
|
||||||
+ loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
|
|
||||||
+
|
|
||||||
+ /*
|
|
||||||
+ * Zero out the remaining of the last page of our inline extent,
|
|
||||||
+ * instead of directly truncating our inline extent here - that
|
|
||||||
+ * would be much more complex (decompressing all the data, then
|
|
||||||
+ * compressing the truncated data, which might be bigger than
|
|
||||||
+ * the size of the inline extent, resize the extent, etc).
|
|
||||||
+ * We release the path because to get the page we might need to
|
|
||||||
+ * read the extent item from disk (data not in the page cache).
|
|
||||||
+ */
|
|
||||||
+ btrfs_release_path(path);
|
|
||||||
+ return btrfs_truncate_page(inode, offset, page_end - offset, 0);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ btrfs_set_file_extent_ram_bytes(leaf, fi, size);
|
|
||||||
+ size = btrfs_file_extent_calc_inline_size(size);
|
|
||||||
+ btrfs_truncate_item(root, path, size, 1);
|
|
||||||
+
|
|
||||||
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
|
|
||||||
+ inode_sub_bytes(inode, item_end + 1 - new_size);
|
|
||||||
+
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/*
|
|
||||||
* this can truncate away extent items, csum items and directory items.
|
|
||||||
* It starts at a high offset and removes keys until it can't find
|
|
||||||
@@ -4411,27 +4452,40 @@ search_again:
|
|
||||||
* special encodings
|
|
||||||
*/
|
|
||||||
if (!del_item &&
|
|
||||||
- btrfs_file_extent_compression(leaf, fi) == 0 &&
|
|
||||||
btrfs_file_extent_encryption(leaf, fi) == 0 &&
|
|
||||||
btrfs_file_extent_other_encoding(leaf, fi) == 0) {
|
|
||||||
- u32 size = new_size - found_key.offset;
|
|
||||||
-
|
|
||||||
- if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
|
|
||||||
- inode_sub_bytes(inode, item_end + 1 -
|
|
||||||
- new_size);
|
|
||||||
|
|
||||||
/*
|
|
||||||
- * update the ram bytes to properly reflect
|
|
||||||
- * the new size of our item
|
|
||||||
+ * Need to release path in order to truncate a
|
|
||||||
+ * compressed extent. So delete any accumulated
|
|
||||||
+ * extent items so far.
|
|
||||||
*/
|
|
||||||
- btrfs_set_file_extent_ram_bytes(leaf, fi, size);
|
|
||||||
- size =
|
|
||||||
- btrfs_file_extent_calc_inline_size(size);
|
|
||||||
- btrfs_truncate_item(root, path, size, 1);
|
|
||||||
+ if (btrfs_file_extent_compression(leaf, fi) !=
|
|
||||||
+ BTRFS_COMPRESS_NONE && pending_del_nr) {
|
|
||||||
+ err = btrfs_del_items(trans, root, path,
|
|
||||||
+ pending_del_slot,
|
|
||||||
+ pending_del_nr);
|
|
||||||
+ if (err) {
|
|
||||||
+ btrfs_abort_transaction(trans,
|
|
||||||
+ root,
|
|
||||||
+ err);
|
|
||||||
+ goto error;
|
|
||||||
+ }
|
|
||||||
+ pending_del_nr = 0;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ err = truncate_inline_extent(inode, path,
|
|
||||||
+ &found_key,
|
|
||||||
+ item_end,
|
|
||||||
+ new_size);
|
|
||||||
+ if (err) {
|
|
||||||
+ btrfs_abort_transaction(trans,
|
|
||||||
+ root, err);
|
|
||||||
+ goto error;
|
|
||||||
+ }
|
|
||||||
} else if (test_bit(BTRFS_ROOT_REF_COWS,
|
|
||||||
&root->state)) {
|
|
||||||
- inode_sub_bytes(inode, item_end + 1 -
|
|
||||||
- found_key.offset);
|
|
||||||
+ inode_sub_bytes(inode, item_end + 1 - new_size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
delete:
|
|
||||||
--
|
|
||||||
2.5.0
|
|
||||||
|
|
|
@ -1,77 +0,0 @@
|
||||||
From 8e92c2b0cb50a31e2956760498bc8cdb72993fb3 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Quentin Casasnovas <quentin.casasnovas@oracle.com>
|
|
||||||
Date: Fri, 16 Oct 2015 17:11:42 +0200
|
|
||||||
Subject: [PATCH] RDS: fix race condition when sending a message on unbound
|
|
||||||
socket.
|
|
||||||
|
|
||||||
Sasha's found a NULL pointer dereference in the RDS connection code when
|
|
||||||
sending a message to an apparently unbound socket. The problem is caused
|
|
||||||
by the code checking if the socket is bound in rds_sendmsg(), which checks
|
|
||||||
the rs_bound_addr field without taking a lock on the socket. This opens a
|
|
||||||
race where rs_bound_addr is temporarily set but where the transport is not
|
|
||||||
in rds_bind(), leading to a NULL pointer dereference when trying to
|
|
||||||
dereference 'trans' in __rds_conn_create().
|
|
||||||
|
|
||||||
Vegard wrote a reproducer for this issue, so kindly ask him to share if
|
|
||||||
you're interested.
|
|
||||||
|
|
||||||
I cannot reproduce the NULL pointer dereference using Vegard's reproducer
|
|
||||||
with this patch, whereas I could without.
|
|
||||||
|
|
||||||
Complete earlier incomplete fix to CVE-2015-6937:
|
|
||||||
|
|
||||||
74e98eb08588 ("RDS: verify the underlying transport exists before creating a connection")
|
|
||||||
|
|
||||||
Signed-off-by: Quentin Casasnovas <quentin.casasnovas@oracle.com>
|
|
||||||
Reviewed-by: Vegard Nossum <vegard.nossum@oracle.com>
|
|
||||||
Reviewed-by: Sasha Levin <sasha.levin@oracle.com>
|
|
||||||
Cc: Vegard Nossum <vegard.nossum@oracle.com>
|
|
||||||
Cc: Sasha Levin <sasha.levin@oracle.com>
|
|
||||||
Cc: Chien Yen <chien.yen@oracle.com>
|
|
||||||
Cc: Santosh Shilimkar <santosh.shilimkar@oracle.com>
|
|
||||||
Cc: David S. Miller <davem@davemloft.net>
|
|
||||||
Cc: stable@vger.kernel.org
|
|
||||||
---
|
|
||||||
net/rds/connection.c | 6 ------
|
|
||||||
net/rds/send.c | 4 +++-
|
|
||||||
2 files changed, 3 insertions(+), 7 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/net/rds/connection.c b/net/rds/connection.c
|
|
||||||
index 49adeef8090c..9b2de5e67d79 100644
|
|
||||||
--- a/net/rds/connection.c
|
|
||||||
+++ b/net/rds/connection.c
|
|
||||||
@@ -190,12 +190,6 @@ new_conn:
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
- if (trans == NULL) {
|
|
||||||
- kmem_cache_free(rds_conn_slab, conn);
|
|
||||||
- conn = ERR_PTR(-ENODEV);
|
|
||||||
- goto out;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
conn->c_trans = trans;
|
|
||||||
|
|
||||||
ret = trans->conn_alloc(conn, gfp);
|
|
||||||
diff --git a/net/rds/send.c b/net/rds/send.c
|
|
||||||
index 4df61a515b83..859de6f32521 100644
|
|
||||||
--- a/net/rds/send.c
|
|
||||||
+++ b/net/rds/send.c
|
|
||||||
@@ -1009,11 +1009,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
|
|
||||||
release_sock(sk);
|
|
||||||
}
|
|
||||||
|
|
||||||
- /* racing with another thread binding seems ok here */
|
|
||||||
+ lock_sock(sk);
|
|
||||||
if (daddr == 0 || rs->rs_bound_addr == 0) {
|
|
||||||
+ release_sock(sk);
|
|
||||||
ret = -ENOTCONN; /* XXX not a great errno */
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
+ release_sock(sk);
|
|
||||||
|
|
||||||
if (payload_len > rds_sk_sndbuf(rs)) {
|
|
||||||
ret = -EMSGSIZE;
|
|
||||||
--
|
|
||||||
2.4.3
|
|
||||||
|
|
14
kernel.spec
14
kernel.spec
|
@ -52,7 +52,7 @@ Summary: The Linux kernel
|
||||||
%if 0%{?released_kernel}
|
%if 0%{?released_kernel}
|
||||||
|
|
||||||
# Do we have a -stable update to apply?
|
# Do we have a -stable update to apply?
|
||||||
%define stable_update 2
|
%define stable_update 3
|
||||||
# Set rpm version accordingly
|
# Set rpm version accordingly
|
||||||
%if 0%{?stable_update}
|
%if 0%{?stable_update}
|
||||||
%define stablerev %{stable_update}
|
%define stablerev %{stable_update}
|
||||||
|
@ -592,9 +592,6 @@ Patch503: drm-i915-turn-off-wc-mmaps.patch
|
||||||
|
|
||||||
Patch508: kexec-uefi-copy-secure_boot-flag-in-boot-params.patch
|
Patch508: kexec-uefi-copy-secure_boot-flag-in-boot-params.patch
|
||||||
|
|
||||||
#CVE-2015-7990 rhbz 1276437 1276438
|
|
||||||
Patch524: RDS-fix-race-condition-when-sending-a-message-on-unb.patch
|
|
||||||
|
|
||||||
#CVE-2015-7799 rhbz 1271134 1271135
|
#CVE-2015-7799 rhbz 1271134 1271135
|
||||||
Patch512: isdn_ppp-Add-checks-for-allocation-failure-in-isdn_p.patch
|
Patch512: isdn_ppp-Add-checks-for-allocation-failure-in-isdn_p.patch
|
||||||
Patch513: ppp-slip-Validate-VJ-compression-slot-parameters-com.patch
|
Patch513: ppp-slip-Validate-VJ-compression-slot-parameters-com.patch
|
||||||
|
@ -613,9 +610,6 @@ Patch556: netfilter-ipset-Fix-extension-alignment.patch
|
||||||
Patch557: netfilter-ipset-Fix-hash-type-expiration.patch
|
Patch557: netfilter-ipset-Fix-hash-type-expiration.patch
|
||||||
Patch558: netfilter-ipset-Fix-hash-type-expire-release-empty-h.patch
|
Patch558: netfilter-ipset-Fix-hash-type-expire-release-empty-h.patch
|
||||||
|
|
||||||
#CVE-2015-8374 rhbz 1286261 1286262
|
|
||||||
Patch565: Btrfs-fix-truncation-of-compressed-and-inlined-exten.patch
|
|
||||||
|
|
||||||
#rhbz 1284059
|
#rhbz 1284059
|
||||||
Patch566: KEYS-Fix-handling-of-stored-error-in-a-negatively-in.patch
|
Patch566: KEYS-Fix-handling-of-stored-error-in-a-negatively-in.patch
|
||||||
|
|
||||||
|
@ -634,9 +628,6 @@ Patch571: ideapad-laptop-Add-Lenovo-ideapad-Y700-17ISK-to-no_h.patch
|
||||||
#rhbz 1288687
|
#rhbz 1288687
|
||||||
Patch572: alua_fix.patch
|
Patch572: alua_fix.patch
|
||||||
|
|
||||||
#CVE-2013-7446 rhbz 1282688 1282712
|
|
||||||
Patch573: unix-avoid-use-after-free-in-ep_remove_wait_queue.patch
|
|
||||||
|
|
||||||
#CVE-XXXX-XXXX rhbz 1291329 1291332
|
#CVE-XXXX-XXXX rhbz 1291329 1291332
|
||||||
Patch574: ovl-fix-permission-checking-for-setattr.patch
|
Patch574: ovl-fix-permission-checking-for-setattr.patch
|
||||||
|
|
||||||
|
@ -2086,6 +2077,9 @@ fi
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
%changelog
|
%changelog
|
||||||
|
* Tue Dec 15 2015 Josh Boyer <jwboyer@fedoraproject.org>
|
||||||
|
- Linux v4.3.3
|
||||||
|
|
||||||
* Mon Dec 14 2015 Josh Boyer <jwboyer@fedoraproject.org>
|
* Mon Dec 14 2015 Josh Boyer <jwboyer@fedoraproject.org>
|
||||||
- CVE-2015-7550 Race between read and revoke keys (rhbz 1291197 1291198)
|
- CVE-2015-7550 Race between read and revoke keys (rhbz 1291197 1291198)
|
||||||
- CVE-XXXX-XXXX permission bypass on overlayfs (rhbz 1291329 1291332)
|
- CVE-XXXX-XXXX permission bypass on overlayfs (rhbz 1291329 1291332)
|
||||||
|
|
2
sources
2
sources
|
@ -1,3 +1,3 @@
|
||||||
58b35794eee3b6d52ce7be39357801e7 linux-4.3.tar.xz
|
58b35794eee3b6d52ce7be39357801e7 linux-4.3.tar.xz
|
||||||
7c516c9528b9f9aac0136944b0200b7e perf-man-4.3.tar.gz
|
7c516c9528b9f9aac0136944b0200b7e perf-man-4.3.tar.gz
|
||||||
3a465c7cf55ec9dbf2d72d9292aa5fde patch-4.3.2.xz
|
d3235b3640ae6ac1ab579171943fda4b patch-4.3.3.xz
|
||||||
|
|
|
@ -1,333 +0,0 @@
|
||||||
From a46b9d2bac864f3ef6b21eb96864ddd88794222d Mon Sep 17 00:00:00 2001
|
|
||||||
From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
|
|
||||||
Date: Fri, 20 Nov 2015 22:07:23 +0000
|
|
||||||
Subject: [PATCH 05/43] unix: avoid use-after-free in ep_remove_wait_queue
|
|
||||||
|
|
||||||
[ Upstream commit 7d267278a9ece963d77eefec61630223fce08c6c ]
|
|
||||||
|
|
||||||
Rainer Weikusat <rweikusat@mobileactivedefense.com> writes:
|
|
||||||
An AF_UNIX datagram socket being the client in an n:1 association with
|
|
||||||
some server socket is only allowed to send messages to the server if the
|
|
||||||
receive queue of this socket contains at most sk_max_ack_backlog
|
|
||||||
datagrams. This implies that prospective writers might be forced to go
|
|
||||||
to sleep despite none of the message presently enqueued on the server
|
|
||||||
receive queue were sent by them. In order to ensure that these will be
|
|
||||||
woken up once space becomes again available, the present unix_dgram_poll
|
|
||||||
routine does a second sock_poll_wait call with the peer_wait wait queue
|
|
||||||
of the server socket as queue argument (unix_dgram_recvmsg does a wake
|
|
||||||
up on this queue after a datagram was received). This is inherently
|
|
||||||
problematic because the server socket is only guaranteed to remain alive
|
|
||||||
for as long as the client still holds a reference to it. In case the
|
|
||||||
connection is dissolved via connect or by the dead peer detection logic
|
|
||||||
in unix_dgram_sendmsg, the server socket may be freed despite "the
|
|
||||||
polling mechanism" (in particular, epoll) still has a pointer to the
|
|
||||||
corresponding peer_wait queue. There's no way to forcibly deregister a
|
|
||||||
wait queue with epoll.
|
|
||||||
|
|
||||||
Based on an idea by Jason Baron, the patch below changes the code such
|
|
||||||
that a wait_queue_t belonging to the client socket is enqueued on the
|
|
||||||
peer_wait queue of the server whenever the peer receive queue full
|
|
||||||
condition is detected by either a sendmsg or a poll. A wake up on the
|
|
||||||
peer queue is then relayed to the ordinary wait queue of the client
|
|
||||||
socket via wake function. The connection to the peer wait queue is again
|
|
||||||
dissolved if either a wake up is about to be relayed or the client
|
|
||||||
socket reconnects or a dead peer is detected or the client socket is
|
|
||||||
itself closed. This enables removing the second sock_poll_wait from
|
|
||||||
unix_dgram_poll, thus avoiding the use-after-free, while still ensuring
|
|
||||||
that no blocked writer sleeps forever.
|
|
||||||
|
|
||||||
Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com>
|
|
||||||
Fixes: ec0d215f9420 ("af_unix: fix 'poll for write'/connected DGRAM sockets")
|
|
||||||
Reviewed-by: Jason Baron <jbaron@akamai.com>
|
|
||||||
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
||||||
---
|
|
||||||
include/net/af_unix.h | 1 +
|
|
||||||
net/unix/af_unix.c | 183 ++++++++++++++++++++++++++++++++++++++++++++------
|
|
||||||
2 files changed, 165 insertions(+), 19 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
|
|
||||||
index b36d837..2a91a05 100644
|
|
||||||
--- a/include/net/af_unix.h
|
|
||||||
+++ b/include/net/af_unix.h
|
|
||||||
@@ -62,6 +62,7 @@ struct unix_sock {
|
|
||||||
#define UNIX_GC_CANDIDATE 0
|
|
||||||
#define UNIX_GC_MAYBE_CYCLE 1
|
|
||||||
struct socket_wq peer_wq;
|
|
||||||
+ wait_queue_t peer_wake;
|
|
||||||
};
|
|
||||||
|
|
||||||
static inline struct unix_sock *unix_sk(const struct sock *sk)
|
|
||||||
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
|
|
||||||
index 42ab2cc..153b2f2 100644
|
|
||||||
--- a/net/unix/af_unix.c
|
|
||||||
+++ b/net/unix/af_unix.c
|
|
||||||
@@ -326,6 +326,118 @@ found:
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Support code for asymmetrically connected dgram sockets
|
|
||||||
+ *
|
|
||||||
+ * If a datagram socket is connected to a socket not itself connected
|
|
||||||
+ * to the first socket (eg, /dev/log), clients may only enqueue more
|
|
||||||
+ * messages if the present receive queue of the server socket is not
|
|
||||||
+ * "too large". This means there's a second writeability condition
|
|
||||||
+ * poll and sendmsg need to test. The dgram recv code will do a wake
|
|
||||||
+ * up on the peer_wait wait queue of a socket upon reception of a
|
|
||||||
+ * datagram which needs to be propagated to sleeping would-be writers
|
|
||||||
+ * since these might not have sent anything so far. This can't be
|
|
||||||
+ * accomplished via poll_wait because the lifetime of the server
|
|
||||||
+ * socket might be less than that of its clients if these break their
|
|
||||||
+ * association with it or if the server socket is closed while clients
|
|
||||||
+ * are still connected to it and there's no way to inform "a polling
|
|
||||||
+ * implementation" that it should let go of a certain wait queue
|
|
||||||
+ *
|
|
||||||
+ * In order to propagate a wake up, a wait_queue_t of the client
|
|
||||||
+ * socket is enqueued on the peer_wait queue of the server socket
|
|
||||||
+ * whose wake function does a wake_up on the ordinary client socket
|
|
||||||
+ * wait queue. This connection is established whenever a write (or
|
|
||||||
+ * poll for write) hit the flow control condition and broken when the
|
|
||||||
+ * association to the server socket is dissolved or after a wake up
|
|
||||||
+ * was relayed.
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
|
|
||||||
+ void *key)
|
|
||||||
+{
|
|
||||||
+ struct unix_sock *u;
|
|
||||||
+ wait_queue_head_t *u_sleep;
|
|
||||||
+
|
|
||||||
+ u = container_of(q, struct unix_sock, peer_wake);
|
|
||||||
+
|
|
||||||
+ __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
|
|
||||||
+ q);
|
|
||||||
+ u->peer_wake.private = NULL;
|
|
||||||
+
|
|
||||||
+ /* relaying can only happen while the wq still exists */
|
|
||||||
+ u_sleep = sk_sleep(&u->sk);
|
|
||||||
+ if (u_sleep)
|
|
||||||
+ wake_up_interruptible_poll(u_sleep, key);
|
|
||||||
+
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
|
|
||||||
+{
|
|
||||||
+ struct unix_sock *u, *u_other;
|
|
||||||
+ int rc;
|
|
||||||
+
|
|
||||||
+ u = unix_sk(sk);
|
|
||||||
+ u_other = unix_sk(other);
|
|
||||||
+ rc = 0;
|
|
||||||
+ spin_lock(&u_other->peer_wait.lock);
|
|
||||||
+
|
|
||||||
+ if (!u->peer_wake.private) {
|
|
||||||
+ u->peer_wake.private = other;
|
|
||||||
+ __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
|
|
||||||
+
|
|
||||||
+ rc = 1;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ spin_unlock(&u_other->peer_wait.lock);
|
|
||||||
+ return rc;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void unix_dgram_peer_wake_disconnect(struct sock *sk,
|
|
||||||
+ struct sock *other)
|
|
||||||
+{
|
|
||||||
+ struct unix_sock *u, *u_other;
|
|
||||||
+
|
|
||||||
+ u = unix_sk(sk);
|
|
||||||
+ u_other = unix_sk(other);
|
|
||||||
+ spin_lock(&u_other->peer_wait.lock);
|
|
||||||
+
|
|
||||||
+ if (u->peer_wake.private == other) {
|
|
||||||
+ __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
|
|
||||||
+ u->peer_wake.private = NULL;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ spin_unlock(&u_other->peer_wait.lock);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
|
|
||||||
+ struct sock *other)
|
|
||||||
+{
|
|
||||||
+ unix_dgram_peer_wake_disconnect(sk, other);
|
|
||||||
+ wake_up_interruptible_poll(sk_sleep(sk),
|
|
||||||
+ POLLOUT |
|
|
||||||
+ POLLWRNORM |
|
|
||||||
+ POLLWRBAND);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* preconditions:
|
|
||||||
+ * - unix_peer(sk) == other
|
|
||||||
+ * - association is stable
|
|
||||||
+ */
|
|
||||||
+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
|
|
||||||
+{
|
|
||||||
+ int connected;
|
|
||||||
+
|
|
||||||
+ connected = unix_dgram_peer_wake_connect(sk, other);
|
|
||||||
+
|
|
||||||
+ if (unix_recvq_full(other))
|
|
||||||
+ return 1;
|
|
||||||
+
|
|
||||||
+ if (connected)
|
|
||||||
+ unix_dgram_peer_wake_disconnect(sk, other);
|
|
||||||
+
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static inline int unix_writable(struct sock *sk)
|
|
||||||
{
|
|
||||||
return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
|
|
||||||
@@ -430,6 +542,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
|
|
||||||
skpair->sk_state_change(skpair);
|
|
||||||
sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+ unix_dgram_peer_wake_disconnect(sk, skpair);
|
|
||||||
sock_put(skpair); /* It may now die */
|
|
||||||
unix_peer(sk) = NULL;
|
|
||||||
}
|
|
||||||
@@ -665,6 +779,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
|
|
||||||
INIT_LIST_HEAD(&u->link);
|
|
||||||
mutex_init(&u->readlock); /* single task reading lock */
|
|
||||||
init_waitqueue_head(&u->peer_wait);
|
|
||||||
+ init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
|
|
||||||
unix_insert_socket(unix_sockets_unbound(sk), sk);
|
|
||||||
out:
|
|
||||||
if (sk == NULL)
|
|
||||||
@@ -1032,6 +1147,8 @@ restart:
|
|
||||||
if (unix_peer(sk)) {
|
|
||||||
struct sock *old_peer = unix_peer(sk);
|
|
||||||
unix_peer(sk) = other;
|
|
||||||
+ unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
|
|
||||||
+
|
|
||||||
unix_state_double_unlock(sk, other);
|
|
||||||
|
|
||||||
if (other != old_peer)
|
|
||||||
@@ -1471,6 +1588,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
|
|
||||||
struct scm_cookie scm;
|
|
||||||
int max_level;
|
|
||||||
int data_len = 0;
|
|
||||||
+ int sk_locked;
|
|
||||||
|
|
||||||
wait_for_unix_gc();
|
|
||||||
err = scm_send(sock, msg, &scm, false);
|
|
||||||
@@ -1549,12 +1667,14 @@ restart:
|
|
||||||
goto out_free;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ sk_locked = 0;
|
|
||||||
unix_state_lock(other);
|
|
||||||
+restart_locked:
|
|
||||||
err = -EPERM;
|
|
||||||
if (!unix_may_send(sk, other))
|
|
||||||
goto out_unlock;
|
|
||||||
|
|
||||||
- if (sock_flag(other, SOCK_DEAD)) {
|
|
||||||
+ if (unlikely(sock_flag(other, SOCK_DEAD))) {
|
|
||||||
/*
|
|
||||||
* Check with 1003.1g - what should
|
|
||||||
* datagram error
|
|
||||||
@@ -1562,10 +1682,14 @@ restart:
|
|
||||||
unix_state_unlock(other);
|
|
||||||
sock_put(other);
|
|
||||||
|
|
||||||
+ if (!sk_locked)
|
|
||||||
+ unix_state_lock(sk);
|
|
||||||
+
|
|
||||||
err = 0;
|
|
||||||
- unix_state_lock(sk);
|
|
||||||
if (unix_peer(sk) == other) {
|
|
||||||
unix_peer(sk) = NULL;
|
|
||||||
+ unix_dgram_peer_wake_disconnect_wakeup(sk, other);
|
|
||||||
+
|
|
||||||
unix_state_unlock(sk);
|
|
||||||
|
|
||||||
unix_dgram_disconnected(sk, other);
|
|
||||||
@@ -1591,21 +1715,38 @@ restart:
|
|
||||||
goto out_unlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
- if (unix_peer(other) != sk && unix_recvq_full(other)) {
|
|
||||||
- if (!timeo) {
|
|
||||||
- err = -EAGAIN;
|
|
||||||
- goto out_unlock;
|
|
||||||
+ if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
|
|
||||||
+ if (timeo) {
|
|
||||||
+ timeo = unix_wait_for_peer(other, timeo);
|
|
||||||
+
|
|
||||||
+ err = sock_intr_errno(timeo);
|
|
||||||
+ if (signal_pending(current))
|
|
||||||
+ goto out_free;
|
|
||||||
+
|
|
||||||
+ goto restart;
|
|
||||||
}
|
|
||||||
|
|
||||||
- timeo = unix_wait_for_peer(other, timeo);
|
|
||||||
+ if (!sk_locked) {
|
|
||||||
+ unix_state_unlock(other);
|
|
||||||
+ unix_state_double_lock(sk, other);
|
|
||||||
+ }
|
|
||||||
|
|
||||||
- err = sock_intr_errno(timeo);
|
|
||||||
- if (signal_pending(current))
|
|
||||||
- goto out_free;
|
|
||||||
+ if (unix_peer(sk) != other ||
|
|
||||||
+ unix_dgram_peer_wake_me(sk, other)) {
|
|
||||||
+ err = -EAGAIN;
|
|
||||||
+ sk_locked = 1;
|
|
||||||
+ goto out_unlock;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
- goto restart;
|
|
||||||
+ if (!sk_locked) {
|
|
||||||
+ sk_locked = 1;
|
|
||||||
+ goto restart_locked;
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
+ if (unlikely(sk_locked))
|
|
||||||
+ unix_state_unlock(sk);
|
|
||||||
+
|
|
||||||
if (sock_flag(other, SOCK_RCVTSTAMP))
|
|
||||||
__net_timestamp(skb);
|
|
||||||
maybe_add_creds(skb, sock, other);
|
|
||||||
@@ -1619,6 +1760,8 @@ restart:
|
|
||||||
return len;
|
|
||||||
|
|
||||||
out_unlock:
|
|
||||||
+ if (sk_locked)
|
|
||||||
+ unix_state_unlock(sk);
|
|
||||||
unix_state_unlock(other);
|
|
||||||
out_free:
|
|
||||||
kfree_skb(skb);
|
|
||||||
@@ -2475,14 +2618,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
|
|
||||||
return mask;
|
|
||||||
|
|
||||||
writable = unix_writable(sk);
|
|
||||||
- other = unix_peer_get(sk);
|
|
||||||
- if (other) {
|
|
||||||
- if (unix_peer(other) != sk) {
|
|
||||||
- sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
|
|
||||||
- if (unix_recvq_full(other))
|
|
||||||
- writable = 0;
|
|
||||||
- }
|
|
||||||
- sock_put(other);
|
|
||||||
+ if (writable) {
|
|
||||||
+ unix_state_lock(sk);
|
|
||||||
+
|
|
||||||
+ other = unix_peer(sk);
|
|
||||||
+ if (other && unix_peer(other) != sk &&
|
|
||||||
+ unix_recvq_full(other) &&
|
|
||||||
+ unix_dgram_peer_wake_me(sk, other))
|
|
||||||
+ writable = 0;
|
|
||||||
+
|
|
||||||
+ unix_state_unlock(sk);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (writable)
|
|
||||||
--
|
|
||||||
2.1.0
|
|
||||||
|
|
Loading…
Reference in New Issue