Compare commits
10 Commits
master
...
rawhide/us
Author | SHA1 | Date |
---|---|---|
Jarod Wilson | ff12eec93b | |
Jarod Wilson | ee55cc9d82 | |
Chuck Ebbert | 89ddee85e3 | |
Chuck Ebbert | c62fd951a3 | |
Kyle McMartin | 5d6f3748c3 | |
Kyle McMartin | b1794f76d9 | |
Kyle McMartin | 34a06da355 | |
Kyle McMartin | aab93ae971 | |
Kyle McMartin | 7ad304ff1f | |
Kyle McMartin | 49bf2616cc |
|
@ -3,5 +3,3 @@ patch-*.bz2
|
|||
clog
|
||||
*.rpm
|
||||
kernel-2.6.*/
|
||||
/patch-2.6.36.1.bz2
|
||||
/patch-2.6.36.2-rc1.bz2
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
From 1299757412533c8e2ea1d00a1e77944bc87ff39d Mon Sep 17 00:00:00 2001
|
||||
From: Josef Bacik <josef@redhat.com>
|
||||
Date: Fri, 22 Oct 2010 15:26:53 -0400
|
||||
Subject: [PATCH 1/3] Btrfs: fix error handling in btrfs_get_sb
|
||||
|
||||
If we failed to find the root subvol id, or the subvol=<name>, we would
|
||||
deactivate the locked super and close the devices. The problem is at this point
|
||||
we have gotten the SB all setup, which includes setting super_operations, so
|
||||
when we'd deactiveate the super, we'd do a close_ctree() which closes the
|
||||
devices, so we'd end up closing the devices twice. So if you do something like
|
||||
this
|
||||
|
||||
mount /dev/sda1 /mnt/test1
|
||||
mount /dev/sda1 /mnt/test2 -o subvol=xxx
|
||||
umount /mnt/test1
|
||||
|
||||
it would blow up (if subvol xxx doesn't exist). This patch fixes that problem.
|
||||
Thanks,
|
||||
|
||||
Signed-off-by: Josef Bacik <josef@redhat.com>
|
||||
---
|
||||
fs/btrfs/super.c | 7 +++----
|
||||
1 files changed, 3 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
|
||||
index 1776dbd..185702f 100644
|
||||
--- a/fs/btrfs/super.c
|
||||
+++ b/fs/btrfs/super.c
|
||||
@@ -629,7 +629,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
|
||||
if (IS_ERR(root)) {
|
||||
error = PTR_ERR(root);
|
||||
deactivate_locked_super(s);
|
||||
- goto error;
|
||||
+ goto error_free_subvol_name;
|
||||
}
|
||||
/* if they gave us a subvolume name bind mount into that */
|
||||
if (strcmp(subvol_name, ".")) {
|
||||
@@ -643,14 +643,14 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
|
||||
deactivate_locked_super(s);
|
||||
error = PTR_ERR(new_root);
|
||||
dput(root);
|
||||
- goto error_close_devices;
|
||||
+ goto error_free_subvol_name;
|
||||
}
|
||||
if (!new_root->d_inode) {
|
||||
dput(root);
|
||||
dput(new_root);
|
||||
deactivate_locked_super(s);
|
||||
error = -ENXIO;
|
||||
- goto error_close_devices;
|
||||
+ goto error_free_subvol_name;
|
||||
}
|
||||
dput(root);
|
||||
root = new_root;
|
||||
@@ -668,7 +668,6 @@ error_close_devices:
|
||||
btrfs_close_devices(fs_devices);
|
||||
error_free_subvol_name:
|
||||
kfree(subvol_name);
|
||||
-error:
|
||||
return error;
|
||||
}
|
||||
|
||||
--
|
||||
1.7.3.3
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
From 34825fb2c891ee56fdde84000f0b1bc83a12f42f Mon Sep 17 00:00:00 2001
|
||||
From: Ian Kent <raven@themaw.net>
|
||||
Date: Mon, 22 Nov 2010 02:21:38 +0000
|
||||
Subject: [PATCH 2/3] Btrfs - fix race between btrfs_get_sb() and umount
|
||||
|
||||
When mounting a btrfs file system btrfs_test_super() may attempt to
|
||||
use sb->s_fs_info, the btrfs root, of a super block that is going away
|
||||
and that has had the btrfs root set to NULL in its ->put_super(). But
|
||||
if the super block is going away it cannot be an existing super block
|
||||
so we can return false in this case.
|
||||
|
||||
Signed-off-by: Ian Kent <raven@themaw.net>
|
||||
Signed-off-by: Chris Mason <chris.mason@oracle.com>
|
||||
---
|
||||
fs/btrfs/super.c | 6 ++++++
|
||||
1 files changed, 6 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
|
||||
index 185702f..b169698 100644
|
||||
--- a/fs/btrfs/super.c
|
||||
+++ b/fs/btrfs/super.c
|
||||
@@ -551,6 +551,12 @@ static int btrfs_test_super(struct super_block *s, void *data)
|
||||
struct btrfs_fs_devices *test_fs_devices = data;
|
||||
struct btrfs_root *root = btrfs_sb(s);
|
||||
|
||||
+ /*
|
||||
+ * If this super block is going away, return false as it
|
||||
+ * can't match as an existing super block.
|
||||
+ */
|
||||
+ if (!atomic_read(&s->s_active))
|
||||
+ return 0;
|
||||
return root->fs_info->fs_devices == test_fs_devices;
|
||||
}
|
||||
|
||||
--
|
||||
1.7.3.3
|
||||
|
|
@ -0,0 +1,118 @@
|
|||
From 7f9f05e1f4cd9a625bc35b117a239d9b0cbe4a03 Mon Sep 17 00:00:00 2001
|
||||
From: Josef Bacik <josef@redhat.com>
|
||||
Date: Fri, 19 Nov 2010 14:59:15 -0500
|
||||
Subject: [PATCH 3/3] Btrfs: setup blank root and fs_info for mount time
|
||||
|
||||
There is a problem with how we use sget, it searches through the list of supers
|
||||
attached to the fs_type looking for a super with the same fs_devices as what
|
||||
we're trying to mount. This depends on sb->s_fs_info being filled, but we don't
|
||||
fill that in until we get to btrfs_fill_super, so we could hit supers on the
|
||||
fs_type super list that have a null s_fs_info. In order to fix that we need to
|
||||
go ahead and setup a blank root with a blank fs_info to hold fs_devices, that
|
||||
way our test will work out right and then we can set s_fs_info in
|
||||
btrfs_set_super, and then open_ctree will simply use our pre-allocated root and
|
||||
fs_info when setting everything up. Thanks,
|
||||
|
||||
Signed-off-by: Josef Bacik <josef@redhat.com>
|
||||
Signed-off-by: Chris Mason <chris.mason@oracle.com>
|
||||
---
|
||||
fs/btrfs/disk-io.c | 6 ++----
|
||||
fs/btrfs/super.c | 34 +++++++++++++++++++++++++++++++---
|
||||
2 files changed, 33 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
|
||||
index 64f1008..65efa04 100644
|
||||
--- a/fs/btrfs/disk-io.c
|
||||
+++ b/fs/btrfs/disk-io.c
|
||||
@@ -1539,10 +1539,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
|
||||
GFP_NOFS);
|
||||
struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
|
||||
GFP_NOFS);
|
||||
- struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root),
|
||||
- GFP_NOFS);
|
||||
- struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
|
||||
- GFP_NOFS);
|
||||
+ struct btrfs_root *tree_root = btrfs_sb(sb);
|
||||
+ struct btrfs_fs_info *fs_info = tree_root->fs_info;
|
||||
struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
|
||||
GFP_NOFS);
|
||||
struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
|
||||
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
|
||||
index b169698..6479db9 100644
|
||||
--- a/fs/btrfs/super.c
|
||||
+++ b/fs/btrfs/super.c
|
||||
@@ -548,7 +548,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
|
||||
|
||||
static int btrfs_test_super(struct super_block *s, void *data)
|
||||
{
|
||||
- struct btrfs_fs_devices *test_fs_devices = data;
|
||||
+ struct btrfs_root *test_root = data;
|
||||
struct btrfs_root *root = btrfs_sb(s);
|
||||
|
||||
/*
|
||||
@@ -557,9 +557,17 @@ static int btrfs_test_super(struct super_block *s, void *data)
|
||||
*/
|
||||
if (!atomic_read(&s->s_active))
|
||||
return 0;
|
||||
- return root->fs_info->fs_devices == test_fs_devices;
|
||||
+ return root->fs_info->fs_devices == test_root->fs_info->fs_devices;
|
||||
}
|
||||
|
||||
+static int btrfs_set_super(struct super_block *s, void *data)
|
||||
+{
|
||||
+ s->s_fs_info = data;
|
||||
+
|
||||
+ return set_anon_super(s, data);
|
||||
+}
|
||||
+
|
||||
+
|
||||
/*
|
||||
* Find a superblock for the given device / mount point.
|
||||
*
|
||||
@@ -573,6 +581,8 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
|
||||
struct super_block *s;
|
||||
struct dentry *root;
|
||||
struct btrfs_fs_devices *fs_devices = NULL;
|
||||
+ struct btrfs_root *tree_root = NULL;
|
||||
+ struct btrfs_fs_info *fs_info = NULL;
|
||||
fmode_t mode = FMODE_READ;
|
||||
char *subvol_name = NULL;
|
||||
u64 subvol_objectid = 0;
|
||||
@@ -601,8 +611,24 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
|
||||
goto error_close_devices;
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Setup a dummy root and fs_info for test/set super. This is because
|
||||
+ * we don't actually fill this stuff out until open_ctree, but we need
|
||||
+ * it for searching for existing supers, so this lets us do that and
|
||||
+ * then open_ctree will properly initialize everything later.
|
||||
+ */
|
||||
+ fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
|
||||
+ tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
|
||||
+ if (!fs_info || !tree_root) {
|
||||
+ error = -ENOMEM;
|
||||
+ goto error_close_devices;
|
||||
+ }
|
||||
+ fs_info->tree_root = tree_root;
|
||||
+ fs_info->fs_devices = fs_devices;
|
||||
+ tree_root->fs_info = fs_info;
|
||||
+
|
||||
bdev = fs_devices->latest_bdev;
|
||||
- s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices);
|
||||
+ s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
|
||||
if (IS_ERR(s))
|
||||
goto error_s;
|
||||
|
||||
@@ -672,6 +698,8 @@ error_s:
|
||||
error = PTR_ERR(s);
|
||||
error_close_devices:
|
||||
btrfs_close_devices(fs_devices);
|
||||
+ kfree(fs_info);
|
||||
+ kfree(tree_root);
|
||||
error_free_subvol_name:
|
||||
kfree(subvol_name);
|
||||
return error;
|
||||
--
|
||||
1.7.3.3
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
# This file is intentionally left empty in the stock kernel. Its a nicety
|
||||
# added for those wanting to do custom rebuilds with altered config opts.
|
|
@ -0,0 +1,65 @@
|
|||
From linux-fsdevel-owner@vger.kernel.org Thu Nov 18 21:03:11 2010
|
||||
From: Josef Bacik <josef@redhat.com>
|
||||
To: linux-fsdevel@vger.kernel.org, eparis@redhat.com,
|
||||
linux-kernel@vger.kernel.org, sds@tycho.nsa.gov,
|
||||
selinux@tycho.nsa.gov, bfields@fieldses.org
|
||||
Subject: [PATCH] fs: call security_d_instantiate in d_obtain_alias V2
|
||||
Date: Thu, 18 Nov 2010 20:52:55 -0500
|
||||
Message-Id: <1290131575-2489-1-git-send-email-josef@redhat.com>
|
||||
X-Mailing-List: linux-fsdevel@vger.kernel.org
|
||||
|
||||
While trying to track down some NFS problems with BTRFS, I kept noticing I was
|
||||
getting -EACCESS for no apparent reason. Eric Paris and printk() helped me
|
||||
figure out that it was SELinux that was giving me grief, with the following
|
||||
denial
|
||||
|
||||
type=AVC msg=audit(1290013638.413:95): avc: denied { 0x800000 } for pid=1772
|
||||
comm="nfsd" name="" dev=sda1 ino=256 scontext=system_u:system_r:kernel_t:s0
|
||||
tcontext=system_u:object_r:unlabeled_t:s0 tclass=file
|
||||
|
||||
Turns out this is because in d_obtain_alias if we can't find an alias we create
|
||||
one and do all the normal instantiation stuff, but we don't do the
|
||||
security_d_instantiate.
|
||||
|
||||
Usually we are protected from getting a hashed dentry that hasn't yet run
|
||||
security_d_instantiate() by the parent's i_mutex, but obviously this isn't an
|
||||
option there, so in order to deal with the case that a second thread comes in
|
||||
and finds our new dentry before we get to run security_d_instantiate(), we go
|
||||
ahead and call it if we find a dentry already. Eric assures me that this is ok
|
||||
as the code checks to see if the dentry has been initialized already so calling
|
||||
security_d_instantiate() against the same dentry multiple times is ok. With
|
||||
this patch I'm no longer getting errant -EACCESS values.
|
||||
|
||||
Signed-off-by: Josef Bacik <josef@redhat.com>
|
||||
---
|
||||
V1->V2:
|
||||
-added second security_d_instantiate() call
|
||||
|
||||
fs/dcache.c | 3 +++
|
||||
1 files changed, 3 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/fs/dcache.c b/fs/dcache.c
|
||||
index 23702a9..119d489 100644
|
||||
--- a/fs/dcache.c
|
||||
+++ b/fs/dcache.c
|
||||
@@ -1201,9 +1201,12 @@ struct dentry *d_obtain_alias(struct inode *inode)
|
||||
spin_unlock(&tmp->d_lock);
|
||||
|
||||
spin_unlock(&dcache_lock);
|
||||
+ security_d_instantiate(tmp, inode);
|
||||
return tmp;
|
||||
|
||||
out_iput:
|
||||
+ if (res && !IS_ERR(res))
|
||||
+ security_d_instantiate(res, inode);
|
||||
iput(inode);
|
||||
return res;
|
||||
}
|
||||
--
|
||||
1.6.6.1
|
||||
|
||||
--
|
||||
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
|
||||
the body of a message to majordomo@vger.kernel.org
|
||||
More majordomo info at http://vger.kernel.org/majordomo-info.html
|
||||
|
94
kernel.spec
94
kernel.spec
|
@ -51,7 +51,7 @@ Summary: The Linux kernel
|
|||
# For non-released -rc kernels, this will be prepended with "0.", so
|
||||
# for example a 3 here will become 0.3
|
||||
#
|
||||
%global baserelease 12
|
||||
%global baserelease 14
|
||||
%global fedora_build %{baserelease}
|
||||
|
||||
# base_sublevel is the kernel version we're starting with and patching
|
||||
|
@ -63,9 +63,9 @@ Summary: The Linux kernel
|
|||
%if 0%{?released_kernel}
|
||||
|
||||
# Do we have a -stable update to apply?
|
||||
%define stable_update 2
|
||||
%define stable_update 3
|
||||
# Is it a -stable RC?
|
||||
%define stable_rc 1
|
||||
%define stable_rc 0
|
||||
# Set rpm version accordingly
|
||||
%if 0%{?stable_update}
|
||||
%define stablerev .%{stable_update}
|
||||
|
@ -103,6 +103,8 @@ Summary: The Linux kernel
|
|||
%define with_up %{?_without_up: 0} %{?!_without_up: 1}
|
||||
# kernel-smp (only valid for ppc 32-bit)
|
||||
%define with_smp %{?_without_smp: 0} %{?!_without_smp: 1}
|
||||
# kernel-PAE (only valid for i686)
|
||||
%define with_pae %{?_without_pae: 0} %{?!_without_pae: 1}
|
||||
# kernel-debug
|
||||
%define with_debug %{?_without_debug: 0} %{?!_without_debug: 1}
|
||||
# kernel-doc
|
||||
|
@ -140,6 +142,8 @@ Summary: The Linux kernel
|
|||
%define with_baseonly %{?_with_baseonly: 1} %{?!_with_baseonly: 0}
|
||||
# Only build the smp kernel (--with smponly):
|
||||
%define with_smponly %{?_with_smponly: 1} %{?!_with_smponly: 0}
|
||||
# Only build the pae kernel (--with paeonly):
|
||||
%define with_paeonly %{?_with_paeonly: 1} %{?!_with_paeonly: 0}
|
||||
# Only build the debug kernel (--with dbgonly):
|
||||
%define with_dbgonly %{?_with_dbgonly: 1} %{?!_with_dbgonly: 0}
|
||||
|
||||
|
@ -221,21 +225,28 @@ Summary: The Linux kernel
|
|||
%define debuginfodir /usr/lib/debug
|
||||
|
||||
# kernel-PAE is only built on i686.
|
||||
%ifarch i686
|
||||
%define with_pae 1
|
||||
%else
|
||||
%ifnarch i686
|
||||
%define with_pae 0
|
||||
%endif
|
||||
|
||||
# if requested, only build base kernel
|
||||
%if %{with_baseonly}
|
||||
%define with_smp 0
|
||||
%define with_pae 0
|
||||
%define with_debug 0
|
||||
%endif
|
||||
|
||||
# if requested, only build smp kernel
|
||||
%if %{with_smponly}
|
||||
%define with_up 0
|
||||
%define with_pae 0
|
||||
%define with_debug 0
|
||||
%endif
|
||||
|
||||
# if requested, only build pae kernel
|
||||
%if %{with_paeonly}
|
||||
%define with_up 0
|
||||
%define with_smp 0
|
||||
%define with_debug 0
|
||||
%endif
|
||||
|
||||
|
@ -243,6 +254,7 @@ Summary: The Linux kernel
|
|||
%if %{with_dbgonly}
|
||||
%if %{debugbuildsenabled}
|
||||
%define with_up 0
|
||||
%define with_pae 0
|
||||
%endif
|
||||
%define with_smp 0
|
||||
%define with_pae 0
|
||||
|
@ -541,6 +553,10 @@ Source90: config-sparc64-generic
|
|||
|
||||
Source100: config-arm
|
||||
|
||||
# This file is intentionally left empty in the stock kernel. Its a nicety
|
||||
# added for those wanting to do custom rebuilds with altered config opts.
|
||||
Source1000: config-local
|
||||
|
||||
# Here should be only the patches up to the upstream canonical Linus tree.
|
||||
|
||||
# For a stable release kernel
|
||||
|
@ -616,6 +632,7 @@ Patch300: pnp-log-pnp-resources-as-we-do-for-pci.patch
|
|||
Patch380: linux-2.6-defaults-pci_no_msi.patch
|
||||
Patch381: linux-2.6-defaults-pci_use_crs.patch
|
||||
Patch383: linux-2.6-defaults-aspm.patch
|
||||
Patch384: pci-disable-aspm-if-bios-asks-us-to.patch
|
||||
|
||||
Patch385: ima-allow-it-to-be-completely-disabled-and-default-off.patch
|
||||
|
||||
|
@ -729,8 +746,6 @@ Patch12308: fix-i8k-inline-asm.patch
|
|||
Patch12405: inet_diag-make-sure-we-run-the-same-bytecode-we-audited.patch
|
||||
Patch12408: netlink-make-nlmsg_find_attr-take-a-const-ptr.patch
|
||||
|
||||
Patch12406: posix-cpu-timers-workaround-to-suppress-problems-with-mt-exec.patch
|
||||
|
||||
Patch12410: tty-make-tiocgicount-a-handler.patch
|
||||
Patch12411: tty-icount-changeover-for-other-main-devices.patch
|
||||
|
||||
|
@ -739,7 +754,13 @@ Patch12413: tpm-autodetect-itpm-devices.patch
|
|||
Patch12420: mm-page-allocator-adjust-the-per-cpu-counter-threshold-when-memory-is-low.patch
|
||||
Patch12421: mm-vmstat-use-a-single-setter-function-and-callback-for-adjusting-percpu-thresholds.patch
|
||||
|
||||
Patch12430: sched-cure-more-NO_HZ-load-average-woes.patch
|
||||
Patch12435: btrfs-fix-error-handling-in-btrfs_get_sb.patch
|
||||
Patch12436: btrfs-fix-race-between-btrfs_get_sb-and-umount.patch
|
||||
Patch12437: btrfs-setup-blank-root-and-fs_info-for-mount-time.patch
|
||||
|
||||
Patch12438: fs-call-security_d_instantiate-in-d_obtain_alias.patch
|
||||
|
||||
Patch12439: net-AF_PACKET-vmalloc.patch
|
||||
|
||||
%endif
|
||||
|
||||
|
@ -1140,6 +1161,14 @@ make -f %{SOURCE20} VERSION=%{version} configs
|
|||
done
|
||||
%endif
|
||||
|
||||
# Merge in any user-provided local config option changes
|
||||
for i in %{all_arch_configs}
|
||||
do
|
||||
mv $i $i.tmp
|
||||
./merge.pl %{SOURCE1000} $i.tmp > $i
|
||||
rm $i.tmp
|
||||
done
|
||||
|
||||
ApplyOptionalPatch git-linus.diff
|
||||
|
||||
ApplyPatch linux-2.6-makefile-after_link.patch
|
||||
|
@ -1187,12 +1216,19 @@ ApplyPatch linux-2.6-32bit-mmap-exec-randomization.patch
|
|||
# bugfixes to drivers and filesystems
|
||||
#
|
||||
|
||||
#rhbz#662344
|
||||
ApplyPatch fs-call-security_d_instantiate-in-d_obtain_alias.patch
|
||||
|
||||
# ext4
|
||||
|
||||
# xfs
|
||||
|
||||
# btrfs
|
||||
|
||||
# rhbz#656465
|
||||
ApplyPatch btrfs-fix-error-handling-in-btrfs_get_sb.patch
|
||||
ApplyPatch btrfs-fix-race-between-btrfs_get_sb-and-umount.patch
|
||||
ApplyPatch btrfs-setup-blank-root-and-fs_info-for-mount-time.patch
|
||||
|
||||
# eCryptfs
|
||||
|
||||
|
@ -1226,6 +1262,7 @@ ApplyPatch linux-2.6-defaults-pci_no_msi.patch
|
|||
ApplyPatch linux-2.6-defaults-pci_use_crs.patch
|
||||
# enable ASPM by default on hardware we expect to work
|
||||
ApplyPatch linux-2.6-defaults-aspm.patch
|
||||
ApplyPatch pci-disable-aspm-if-bios-asks-us-to.patch
|
||||
|
||||
# helps debug resource conflicts [c1f3f281]
|
||||
ApplyPatch pnp-log-pnp-resources-as-we-do-for-pci.patch
|
||||
|
@ -1364,9 +1401,6 @@ ApplyPatch fix-i8k-inline-asm.patch
|
|||
ApplyPatch inet_diag-make-sure-we-run-the-same-bytecode-we-audited.patch
|
||||
ApplyPatch netlink-make-nlmsg_find_attr-take-a-const-ptr.patch
|
||||
|
||||
# rhbz#656264
|
||||
ApplyPatch posix-cpu-timers-workaround-to-suppress-problems-with-mt-exec.patch
|
||||
|
||||
# CVE-2010-4077, CVE-2010-4075 (rhbz#648660, #648663)
|
||||
ApplyPatch tty-make-tiocgicount-a-handler.patch
|
||||
ApplyPatch tty-icount-changeover-for-other-main-devices.patch
|
||||
|
@ -1377,8 +1411,8 @@ ApplyPatch tpm-autodetect-itpm-devices.patch
|
|||
ApplyPatch mm-page-allocator-adjust-the-per-cpu-counter-threshold-when-memory-is-low.patch
|
||||
ApplyPatch mm-vmstat-use-a-single-setter-function-and-callback-for-adjusting-percpu-thresholds.patch
|
||||
|
||||
# rhbz#650934
|
||||
ApplyPatch sched-cure-more-NO_HZ-load-average-woes.patch
|
||||
# rhbz#637619
|
||||
ApplyPatch net-AF_PACKET-vmalloc.patch
|
||||
|
||||
# END OF PATCH APPLICATIONS
|
||||
|
||||
|
@ -1644,7 +1678,7 @@ BuildKernel() {
|
|||
rm -f modinfo modnames
|
||||
|
||||
# remove files that will be auto generated by depmod at rpm -i time
|
||||
for i in alias alias.bin ccwmap dep dep.bin ieee1394map inputmap isapnpmap ofmap pcimap seriomap symbols symbols.bin usbmap
|
||||
for i in alias alias.bin builtin.bin ccwmap dep dep.bin ieee1394map inputmap isapnpmap ofmap pcimap seriomap symbols symbols.bin usbmap
|
||||
do
|
||||
rm -f $RPM_BUILD_ROOT/lib/modules/$KernelVer/modules.$i
|
||||
done
|
||||
|
@ -1992,8 +2026,36 @@ fi
|
|||
# (__)\ )\/\
|
||||
# ||----w |
|
||||
# || ||
|
||||
|
||||
%changelog
|
||||
* Mon Jan 10 2011 Jarod Wilson <jarod@redhat.com>
|
||||
- Add support for local rebuild config option overrides
|
||||
- Add missing --with/--without pae build flag support
|
||||
|
||||
* Mon Jan 10 2011 Chuck Ebbert <cebbert@redhat.com> 2.6.36.3-14
|
||||
- Linux 2.6.36.3
|
||||
- Drop merged patches:
|
||||
posix-cpu-timers-workaround-to-suppress-problems-with-mt-exec.patch
|
||||
sched-cure-more-NO_HZ-load-average-woes.patch
|
||||
orinoco-initialise-priv_hw-before-assigning-the-interrupt.patch
|
||||
|
||||
* Sat Dec 18 2010 Kyle McMartin <kyle@redhat.com>
|
||||
- Patch from nhorman against f13:
|
||||
Enhance AF_PACKET to allow non-contiguous buffer alloc (#637619)
|
||||
|
||||
* Sat Dec 18 2010 Kyle McMartin <kyle@redhat.com>
|
||||
- Fix SELinux issues with NFS/btrfs and/or xfsdump. (#662344)
|
||||
|
||||
* Fri Dec 10 2010 Kyle McMartin <kyle@redhat.com>
|
||||
- pci-disable-aspm-if-bios-asks-us-to.patch: Patch from mjg59 to disable
|
||||
ASPM if the BIOS has disabled it, but enabled it already on some devices.
|
||||
|
||||
* Fri Dec 10 2010 Kyle McMartin <kyle@redhat.com>
|
||||
- Fix various issues mounting btrfs devices with subvolumes (#656465)
|
||||
|
||||
* Thu Dec 09 2010 Kyle McMartin <kyle@redhat.com> 2.6.36.2-13
|
||||
- Linux stable 2.6.36.2
|
||||
- Copy orinoco fix from F-14 so it doesn't get missed.
|
||||
|
||||
* Wed Dec 08 2010 Kyle McMartin <kyle@redhat.com> 2.6.36.2-12.rc1
|
||||
- Linux stable 2.6.36.2-rc1
|
||||
- Drop patches merged in stable series:
|
||||
|
|
|
@ -0,0 +1,254 @@
|
|||
Author: Neil Horman <nhorman@tuxdriver.com>
|
||||
Date: Fri Dec 17 13:35:36 2010 -0500
|
||||
|
||||
Enhance AF_PACKET to support using non-contiguous memory when allocating ring
|
||||
buffer space. This is a combined backport of the following commits from
|
||||
net-next-2.6:
|
||||
0e3125c755445664f00ad036e4fc2cd32fd52877
|
||||
bbce5a59e4e0e6e1dbc85492caaf310ff6611309
|
||||
0af55bb58f8fa7865004ac48d16affe125ac1b7f
|
||||
920b8d913bd3d963d5c88bca160a272b71e0c95a
|
||||
|
||||
diff -up linux-2.6.34.x86_64/net/packet/af_packet.c.orig linux-2.6.34.x86_64/net/packet/af_packet.c
|
||||
--- linux-2.6.34.x86_64/net/packet/af_packet.c.orig 2010-12-17 12:16:58.000000000 -0500
|
||||
+++ linux-2.6.34.x86_64/net/packet/af_packet.c 2010-12-17 12:30:14.000000000 -0500
|
||||
@@ -61,6 +61,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/slab.h>
|
||||
+#include <linux/vmalloc.h>
|
||||
#include <net/net_namespace.h>
|
||||
#include <net/ip.h>
|
||||
#include <net/protocol.h>
|
||||
@@ -161,8 +162,14 @@ struct packet_mreq_max {
|
||||
static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
|
||||
int closing, int tx_ring);
|
||||
|
||||
+#define PGV_FROM_VMALLOC 1
|
||||
+struct pgv {
|
||||
+ char *buffer;
|
||||
+ unsigned char flags;
|
||||
+};
|
||||
+
|
||||
struct packet_ring_buffer {
|
||||
- char **pg_vec;
|
||||
+ struct pgv *pg_vec;
|
||||
unsigned int head;
|
||||
unsigned int frames_per_block;
|
||||
unsigned int frame_size;
|
||||
@@ -214,6 +221,13 @@ struct packet_skb_cb {
|
||||
|
||||
#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
|
||||
|
||||
+static inline struct page *pgv_to_page(void *addr)
|
||||
+{
|
||||
+ if (is_vmalloc_addr(addr))
|
||||
+ return vmalloc_to_page(addr);
|
||||
+ return virt_to_page(addr);
|
||||
+}
|
||||
+
|
||||
static void __packet_set_status(struct packet_sock *po, void *frame, int status)
|
||||
{
|
||||
union {
|
||||
@@ -226,11 +240,11 @@ static void __packet_set_status(struct p
|
||||
switch (po->tp_version) {
|
||||
case TPACKET_V1:
|
||||
h.h1->tp_status = status;
|
||||
- flush_dcache_page(virt_to_page(&h.h1->tp_status));
|
||||
+ flush_dcache_page(pgv_to_page(&h.h1->tp_status));
|
||||
break;
|
||||
case TPACKET_V2:
|
||||
h.h2->tp_status = status;
|
||||
- flush_dcache_page(virt_to_page(&h.h2->tp_status));
|
||||
+ flush_dcache_page(pgv_to_page(&h.h2->tp_status));
|
||||
break;
|
||||
default:
|
||||
pr_err("TPACKET version not supported\n");
|
||||
@@ -253,10 +267,10 @@ static int __packet_get_status(struct pa
|
||||
h.raw = frame;
|
||||
switch (po->tp_version) {
|
||||
case TPACKET_V1:
|
||||
- flush_dcache_page(virt_to_page(&h.h1->tp_status));
|
||||
+ flush_dcache_page(pgv_to_page(&h.h1->tp_status));
|
||||
return h.h1->tp_status;
|
||||
case TPACKET_V2:
|
||||
- flush_dcache_page(virt_to_page(&h.h2->tp_status));
|
||||
+ flush_dcache_page(pgv_to_page(&h.h2->tp_status));
|
||||
return h.h2->tp_status;
|
||||
default:
|
||||
pr_err("TPACKET version not supported\n");
|
||||
@@ -280,7 +294,8 @@ static void *packet_lookup_frame(struct
|
||||
pg_vec_pos = position / rb->frames_per_block;
|
||||
frame_offset = position % rb->frames_per_block;
|
||||
|
||||
- h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
|
||||
+ h.raw = rb->pg_vec[pg_vec_pos].buffer +
|
||||
+ (frame_offset * rb->frame_size);
|
||||
|
||||
if (status != __packet_get_status(po, h.raw))
|
||||
return NULL;
|
||||
@@ -771,15 +786,11 @@ static int tpacket_rcv(struct sk_buff *s
|
||||
__packet_set_status(po, h.raw, status);
|
||||
smp_mb();
|
||||
{
|
||||
- struct page *p_start, *p_end;
|
||||
- u8 *h_end = h.raw + macoff + snaplen - 1;
|
||||
+ u8 *start, *end;
|
||||
|
||||
- p_start = virt_to_page(h.raw);
|
||||
- p_end = virt_to_page(h_end);
|
||||
- while (p_start <= p_end) {
|
||||
- flush_dcache_page(p_start);
|
||||
- p_start++;
|
||||
- }
|
||||
+ end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
|
||||
+ for (start = h.raw; start < end; start += PAGE_SIZE)
|
||||
+ flush_dcache_page(pgv_to_page(start));
|
||||
}
|
||||
|
||||
sk->sk_data_ready(sk, 0);
|
||||
@@ -886,7 +897,6 @@ static int tpacket_fill_skb(struct packe
|
||||
}
|
||||
|
||||
err = -EFAULT;
|
||||
- page = virt_to_page(data);
|
||||
offset = offset_in_page(data);
|
||||
len_max = PAGE_SIZE - offset;
|
||||
len = ((to_write > len_max) ? len_max : to_write);
|
||||
@@ -905,11 +915,11 @@ static int tpacket_fill_skb(struct packe
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
+ page = pgv_to_page(data);
|
||||
+ data += len;
|
||||
flush_dcache_page(page);
|
||||
get_page(page);
|
||||
- skb_fill_page_desc(skb,
|
||||
- nr_frags,
|
||||
- page++, offset, len);
|
||||
+ skb_fill_page_desc(skb, nr_frags, page, offset, len);
|
||||
to_write -= len;
|
||||
offset = 0;
|
||||
len_max = PAGE_SIZE;
|
||||
@@ -2230,37 +2240,76 @@ static const struct vm_operations_struct
|
||||
.close = packet_mm_close,
|
||||
};
|
||||
|
||||
-static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
|
||||
+static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
|
||||
+ unsigned int len)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
- if (likely(pg_vec[i]))
|
||||
- free_pages((unsigned long) pg_vec[i], order);
|
||||
+ if (likely(pg_vec[i].buffer)) {
|
||||
+ if (pg_vec[i].flags & PGV_FROM_VMALLOC)
|
||||
+ vfree(pg_vec[i].buffer);
|
||||
+ else
|
||||
+ free_pages((unsigned long)pg_vec[i].buffer,
|
||||
+ order);
|
||||
+ pg_vec[i].buffer = NULL;
|
||||
+ }
|
||||
}
|
||||
kfree(pg_vec);
|
||||
}
|
||||
|
||||
-static inline char *alloc_one_pg_vec_page(unsigned long order)
|
||||
+static inline char *alloc_one_pg_vec_page(unsigned long order,
|
||||
+ unsigned char *flags)
|
||||
{
|
||||
- gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
|
||||
+ char *buffer = NULL;
|
||||
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
|
||||
+ __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
|
||||
+
|
||||
+ buffer = (char *) __get_free_pages(gfp_flags, order);
|
||||
|
||||
- return (char *) __get_free_pages(gfp_flags, order);
|
||||
+ if (buffer)
|
||||
+ return buffer;
|
||||
+
|
||||
+ /*
|
||||
+ * __get_free_pages failed, fall back to vmalloc
|
||||
+ */
|
||||
+ *flags |= PGV_FROM_VMALLOC;
|
||||
+ buffer = vmalloc((1 << order) * PAGE_SIZE);
|
||||
+
|
||||
+ if (buffer) {
|
||||
+ memset(buffer, 0, (1 << order) * PAGE_SIZE);
|
||||
+ return buffer;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * vmalloc failed, lets dig into swap here
|
||||
+ */
|
||||
+ *flags = 0;
|
||||
+ gfp_flags &= ~__GFP_NORETRY;
|
||||
+ buffer = (char *)__get_free_pages(gfp_flags, order);
|
||||
+ if (buffer)
|
||||
+ return buffer;
|
||||
+
|
||||
+ /*
|
||||
+ * complete and utter failure
|
||||
+ */
|
||||
+ return NULL;
|
||||
}
|
||||
|
||||
-static char **alloc_pg_vec(struct tpacket_req *req, int order)
|
||||
+static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
|
||||
{
|
||||
unsigned int block_nr = req->tp_block_nr;
|
||||
- char **pg_vec;
|
||||
+ struct pgv *pg_vec;
|
||||
int i;
|
||||
|
||||
- pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
|
||||
+ pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
|
||||
if (unlikely(!pg_vec))
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < block_nr; i++) {
|
||||
- pg_vec[i] = alloc_one_pg_vec_page(order);
|
||||
- if (unlikely(!pg_vec[i]))
|
||||
+ pg_vec[i].buffer = alloc_one_pg_vec_page(order,
|
||||
+ &pg_vec[i].flags);
|
||||
+ if (unlikely(!pg_vec[i].buffer))
|
||||
goto out_free_pgvec;
|
||||
}
|
||||
|
||||
@@ -2276,7 +2325,7 @@ out_free_pgvec:
|
||||
static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
|
||||
int closing, int tx_ring)
|
||||
{
|
||||
- char **pg_vec = NULL;
|
||||
+ struct pgv *pg_vec = NULL;
|
||||
struct packet_sock *po = pkt_sk(sk);
|
||||
int was_running, order = 0;
|
||||
struct packet_ring_buffer *rb;
|
||||
@@ -2438,15 +2487,22 @@ static int packet_mmap(struct file *file
|
||||
continue;
|
||||
|
||||
for (i = 0; i < rb->pg_vec_len; i++) {
|
||||
- struct page *page = virt_to_page(rb->pg_vec[i]);
|
||||
+ struct page *page;
|
||||
+ void *kaddr = rb->pg_vec[i].buffer;
|
||||
int pg_num;
|
||||
|
||||
for (pg_num = 0; pg_num < rb->pg_vec_pages;
|
||||
- pg_num++, page++) {
|
||||
+ pg_num++) {
|
||||
+ if (rb->pg_vec[i].flags & PGV_FROM_VMALLOC)
|
||||
+ page = vmalloc_to_page(kaddr);
|
||||
+ else
|
||||
+ page = virt_to_page(kaddr);
|
||||
+
|
||||
err = vm_insert_page(vma, start, page);
|
||||
if (unlikely(err))
|
||||
goto out;
|
||||
start += PAGE_SIZE;
|
||||
+ kaddr += PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,144 @@
|
|||
From linux-kernel-owner@vger.kernel.org Mon Dec 6 14:01:17 2010
|
||||
From: Matthew Garrett <mjg@redhat.com>
|
||||
To: linux-pci@vger.kernel.org
|
||||
Cc: linux-kernel@vger.kernel.org, jbarnes@virtuousgeek.org,
|
||||
Matthew Garrett <mjg@redhat.com>
|
||||
Subject: [PATCH v2] PCI: Disable ASPM if BIOS asks us to
|
||||
Date: Mon, 6 Dec 2010 14:00:56 -0500
|
||||
Message-Id: <1291662056-6055-1-git-send-email-mjg@redhat.com>
|
||||
|
||||
We currently refuse to touch the ASPM registers if the BIOS tells us that
|
||||
ASPM isn't supported. This can cause problems if the BIOS has (for any
|
||||
reason) enabled ASPM on some devices anyway. Change the code such that we
|
||||
explicitly clear ASPM if the FADT indicates that ASPM isn't supported,
|
||||
and make sure we tidy up appropriately on device removal in order to deal
|
||||
with the hotplug case. If ASPM is disabled because the BIOS doesn't hand
|
||||
over control then we won't touch the registers.
|
||||
|
||||
Signed-off-by: Matthew Garrett <mjg@redhat.com>
|
||||
---
|
||||
|
||||
Implement Rafael's suggestion to use two separate functions, and also
|
||||
ensure that we clear the clkpm bit as well as the ASPM bits.
|
||||
|
||||
drivers/pci/pci-acpi.c | 1 +
|
||||
drivers/pci/pcie/aspm.c | 21 +++++++++++++++++----
|
||||
include/linux/pci-aspm.h | 5 ++++-
|
||||
3 files changed, 22 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
|
||||
index 24e19c5..d7ea699 100644
|
||||
--- a/drivers/pci/pci-acpi.c
|
||||
+++ b/drivers/pci/pci-acpi.c
|
||||
@@ -399,6 +399,7 @@ static int __init acpi_pci_init(void)
|
||||
|
||||
if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
|
||||
printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
|
||||
+ pcie_clear_aspm();
|
||||
pcie_no_aspm();
|
||||
}
|
||||
|
||||
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
|
||||
index 7122281..8112415 100644
|
||||
--- a/drivers/pci/pcie/aspm.c
|
||||
+++ b/drivers/pci/pcie/aspm.c
|
||||
@@ -68,7 +68,7 @@ struct pcie_link_state {
|
||||
struct aspm_latency acceptable[8];
|
||||
};
|
||||
|
||||
-static int aspm_disabled, aspm_force;
|
||||
+static int aspm_disabled, aspm_force, aspm_clear_state;
|
||||
static DEFINE_MUTEX(aspm_lock);
|
||||
static LIST_HEAD(link_list);
|
||||
|
||||
@@ -139,7 +139,7 @@ static void pcie_set_clkpm(struct pcie_link_state *link, int enable)
|
||||
{
|
||||
/* Don't enable Clock PM if the link is not Clock PM capable */
|
||||
if (!link->clkpm_capable && enable)
|
||||
- return;
|
||||
+ enable = 0;
|
||||
/* Need nothing if the specified equals to current state */
|
||||
if (link->clkpm_enabled == enable)
|
||||
return;
|
||||
@@ -498,6 +498,10 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
|
||||
struct pci_dev *child;
|
||||
int pos;
|
||||
u32 reg32;
|
||||
+
|
||||
+ if (aspm_clear_state)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
/*
|
||||
* Some functions in a slot might not all be PCIe functions,
|
||||
* very strange. Disable ASPM for the whole slot
|
||||
@@ -563,12 +567,15 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
|
||||
struct pcie_link_state *link;
|
||||
int blacklist = !!pcie_aspm_sanity_check(pdev);
|
||||
|
||||
- if (aspm_disabled || !pci_is_pcie(pdev) || pdev->link_state)
|
||||
+ if (!pci_is_pcie(pdev) || pdev->link_state)
|
||||
return;
|
||||
if (pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT &&
|
||||
pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
|
||||
return;
|
||||
|
||||
+ if (aspm_disabled && !aspm_clear_state)
|
||||
+ return;
|
||||
+
|
||||
/* VIA has a strange chipset, root port is under a bridge */
|
||||
if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT &&
|
||||
pdev->bus->self)
|
||||
@@ -641,7 +648,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
|
||||
struct pci_dev *parent = pdev->bus->self;
|
||||
struct pcie_link_state *link, *root, *parent_link;
|
||||
|
||||
- if (aspm_disabled || !pci_is_pcie(pdev) ||
|
||||
+ if ((aspm_disabled && !aspm_clear_state) || !pci_is_pcie(pdev) ||
|
||||
!parent || !parent->link_state)
|
||||
return;
|
||||
if ((parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT) &&
|
||||
@@ -899,6 +906,12 @@ static int __init pcie_aspm_disable(char *str)
|
||||
|
||||
__setup("pcie_aspm=", pcie_aspm_disable);
|
||||
|
||||
+void pcie_clear_aspm(void)
|
||||
+{
|
||||
+ if (!aspm_force)
|
||||
+ aspm_clear_state = 1;
|
||||
+}
|
||||
+
|
||||
void pcie_no_aspm(void)
|
||||
{
|
||||
if (!aspm_force)
|
||||
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
|
||||
index 91ba0b3..ce68105 100644
|
||||
--- a/include/linux/pci-aspm.h
|
||||
+++ b/include/linux/pci-aspm.h
|
||||
@@ -27,6 +27,7 @@ extern void pcie_aspm_init_link_state(struct pci_dev *pdev);
|
||||
extern void pcie_aspm_exit_link_state(struct pci_dev *pdev);
|
||||
extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
|
||||
extern void pci_disable_link_state(struct pci_dev *pdev, int state);
|
||||
+extern void pcie_clear_aspm(void);
|
||||
extern void pcie_no_aspm(void);
|
||||
#else
|
||||
static inline void pcie_aspm_init_link_state(struct pci_dev *pdev)
|
||||
@@ -41,7 +42,9 @@ static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev)
|
||||
static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
|
||||
{
|
||||
}
|
||||
-
|
||||
+static inline void pcie_clear_aspm(void)
|
||||
+{
|
||||
+}
|
||||
static inline void pcie_no_aspm(void)
|
||||
{
|
||||
}
|
||||
--
|
||||
1.7.3.2
|
||||
|
||||
--
|
||||
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
|
||||
the body of a message to majordomo@vger.kernel.org
|
||||
More majordomo info at http://vger.kernel.org/majordomo-info.html
|
||||
Please read the FAQ at http://www.tux.org/lkml/
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
From 78dfb59b62d1a7735cd28fc2783e58c122954fae Mon Sep 17 00:00:00 2001
|
||||
From: Oleg Nesterov <oleg@redhat.com>
|
||||
Date: Fri, 5 Nov 2010 16:53:42 +0100
|
||||
Subject: posix-cpu-timers: workaround to suppress the problems with mt exec
|
||||
|
||||
posix-cpu-timers.c correctly assumes that the dying process does
|
||||
posix_cpu_timers_exit_group() and removes all !CPUCLOCK_PERTHREAD
|
||||
timers from signal->cpu_timers list.
|
||||
|
||||
But, it also assumes that timer->it.cpu.task is always the group
|
||||
leader, and thus the dead ->task means the dead thread group.
|
||||
|
||||
This is obviously not true after de_thread() changes the leader.
|
||||
After that almost every posix_cpu_timer_ method has problems.
|
||||
|
||||
It is not simple to fix this bug correctly. First of all, I think
|
||||
that timer->it.cpu should use struct pid instead of task_struct.
|
||||
Also, the locking should be reworked completely. In particular,
|
||||
tasklist_lock should not be used at all. This all needs a lot of
|
||||
nontrivial and hard-to-test changes.
|
||||
|
||||
Change __exit_signal() to do posix_cpu_timers_exit_group() when
|
||||
the old leader dies during exec. This is not the fix, just the
|
||||
temporary hack to hide the problem for 2.6.37 and stable. IOW,
|
||||
this is obviously wrong but this is what we currently have anyway:
|
||||
cpu timers do not work after mt exec.
|
||||
|
||||
In theory this change adds another race. The exiting leader can
|
||||
detach the timers which were attached to the new leader. However,
|
||||
the window between de_thread() and release_task() is small, we
|
||||
can pretend that sys_timer_create() was called before de_thread().
|
||||
|
||||
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
---
|
||||
kernel/exit.c | 8 ++++++++
|
||||
1 files changed, 8 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/kernel/exit.c b/kernel/exit.c
|
||||
index 0312022..1eff9e4 100644
|
||||
--- a/kernel/exit.c
|
||||
+++ b/kernel/exit.c
|
||||
@@ -95,6 +95,14 @@ static void __exit_signal(struct task_struct *tsk)
|
||||
sig->tty = NULL;
|
||||
} else {
|
||||
/*
|
||||
+ * This can only happen if the caller is de_thread().
|
||||
+ * FIXME: this is the temporary hack, we should teach
|
||||
+ * posix-cpu-timers to handle this case correctly.
|
||||
+ */
|
||||
+ if (unlikely(has_group_leader_pid(tsk)))
|
||||
+ posix_cpu_timers_exit_group(tsk);
|
||||
+
|
||||
+ /*
|
||||
* If there is any task waiting for the group exit
|
||||
* then notify it:
|
||||
*/
|
||||
--
|
||||
1.7.3.2
|
||||
|
|
@ -1,273 +0,0 @@
|
|||
From bounces.tip@hpa.at.zytor.com Wed Dec 8 15:40:48 2010
|
||||
From: tip-bot for Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
In-Reply-To: <1291129145.32004.874.camel@laptop>
|
||||
References: <1291129145.32004.874.camel@laptop>
|
||||
Subject: [tip:sched/urgent] sched: Cure more NO_HZ load average woes
|
||||
Message-ID: <tip-0f004f5a696a9434b7214d0d3cbd0525ee77d428@git.kernel.org>
|
||||
Git-Commit-ID: 0f004f5a696a9434b7214d0d3cbd0525ee77d428
|
||||
|
||||
Commit-ID: 0f004f5a696a9434b7214d0d3cbd0525ee77d428
|
||||
Gitweb: http://git.kernel.org/tip/0f004f5a696a9434b7214d0d3cbd0525ee77d428
|
||||
Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
AuthorDate: Tue, 30 Nov 2010 19:48:45 +0100
|
||||
Committer: Ingo Molnar <mingo@elte.hu>
|
||||
CommitDate: Wed, 8 Dec 2010 20:15:04 +0100
|
||||
|
||||
sched: Cure more NO_HZ load average woes
|
||||
|
||||
There's a long-running regression that proved difficult to fix and
|
||||
which is hitting certain people and is rather annoying in its effects.
|
||||
|
||||
Damien reported that after 74f5187ac8 (sched: Cure load average vs
|
||||
NO_HZ woes) his load average is unnaturally high, he also noted that
|
||||
even with that patch reverted the load avgerage numbers are not
|
||||
correct.
|
||||
|
||||
The problem is that the previous patch only solved half the NO_HZ
|
||||
problem, it addressed the part of going into NO_HZ mode, not of
|
||||
comming out of NO_HZ mode. This patch implements that missing half.
|
||||
|
||||
When comming out of NO_HZ mode there are two important things to take
|
||||
care of:
|
||||
|
||||
- Folding the pending idle delta into the global active count.
|
||||
- Correctly aging the averages for the idle-duration.
|
||||
|
||||
So with this patch the NO_HZ interaction should be complete and
|
||||
behaviour between CONFIG_NO_HZ=[yn] should be equivalent.
|
||||
|
||||
Furthermore, this patch slightly changes the load average computation
|
||||
by adding a rounding term to the fixed point multiplication.
|
||||
|
||||
Reported-by: Damien Wyart <damien.wyart@free.fr>
|
||||
Reported-by: Tim McGrath <tmhikaru@gmail.com>
|
||||
Tested-by: Damien Wyart <damien.wyart@free.fr>
|
||||
Tested-by: Orion Poplawski <orion@cora.nwra.com>
|
||||
Tested-by: Kyle McMartin <kyle@mcmartin.ca>
|
||||
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||||
Cc: stable@kernel.org
|
||||
Cc: Chase Douglas <chase.douglas@canonical.com>
|
||||
LKML-Reference: <1291129145.32004.874.camel@laptop>
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
---
|
||||
include/linux/sched.h | 2 +-
|
||||
kernel/sched.c | 150 +++++++++++++++++++++++++++++++++++++++++++++----
|
||||
kernel/timer.c | 2 +-
|
||||
3 files changed, 141 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
||||
index 2c79e92..2238745 100644
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -143,7 +143,7 @@ extern unsigned long nr_iowait_cpu(int cpu);
|
||||
extern unsigned long this_cpu_load(void);
|
||||
|
||||
|
||||
-extern void calc_global_load(void);
|
||||
+extern void calc_global_load(unsigned long ticks);
|
||||
|
||||
extern unsigned long get_parent_ip(unsigned long addr);
|
||||
|
||||
diff --git a/kernel/sched.c b/kernel/sched.c
|
||||
index dc91a4d..6b7c26a 100644
|
||||
--- a/kernel/sched.c
|
||||
+++ b/kernel/sched.c
|
||||
@@ -3119,6 +3119,15 @@ static long calc_load_fold_active(struct rq *this_rq)
|
||||
return delta;
|
||||
}
|
||||
|
||||
+static unsigned long
|
||||
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
|
||||
+{
|
||||
+ load *= exp;
|
||||
+ load += active * (FIXED_1 - exp);
|
||||
+ load += 1UL << (FSHIFT - 1);
|
||||
+ return load >> FSHIFT;
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_NO_HZ
|
||||
/*
|
||||
* For NO_HZ we delay the active fold to the next LOAD_FREQ update.
|
||||
@@ -3148,6 +3157,128 @@ static long calc_load_fold_idle(void)
|
||||
|
||||
return delta;
|
||||
}
|
||||
+
|
||||
+/**
|
||||
+ * fixed_power_int - compute: x^n, in O(log n) time
|
||||
+ *
|
||||
+ * @x: base of the power
|
||||
+ * @frac_bits: fractional bits of @x
|
||||
+ * @n: power to raise @x to.
|
||||
+ *
|
||||
+ * By exploiting the relation between the definition of the natural power
|
||||
+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and
|
||||
+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
|
||||
+ * (where: n_i \elem {0, 1}, the binary vector representing n),
|
||||
+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
|
||||
+ * of course trivially computable in O(log_2 n), the length of our binary
|
||||
+ * vector.
|
||||
+ */
|
||||
+static unsigned long
|
||||
+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
|
||||
+{
|
||||
+ unsigned long result = 1UL << frac_bits;
|
||||
+
|
||||
+ if (n) for (;;) {
|
||||
+ if (n & 1) {
|
||||
+ result *= x;
|
||||
+ result += 1UL << (frac_bits - 1);
|
||||
+ result >>= frac_bits;
|
||||
+ }
|
||||
+ n >>= 1;
|
||||
+ if (!n)
|
||||
+ break;
|
||||
+ x *= x;
|
||||
+ x += 1UL << (frac_bits - 1);
|
||||
+ x >>= frac_bits;
|
||||
+ }
|
||||
+
|
||||
+ return result;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * a1 = a0 * e + a * (1 - e)
|
||||
+ *
|
||||
+ * a2 = a1 * e + a * (1 - e)
|
||||
+ * = (a0 * e + a * (1 - e)) * e + a * (1 - e)
|
||||
+ * = a0 * e^2 + a * (1 - e) * (1 + e)
|
||||
+ *
|
||||
+ * a3 = a2 * e + a * (1 - e)
|
||||
+ * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
|
||||
+ * = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
|
||||
+ *
|
||||
+ * ...
|
||||
+ *
|
||||
+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
|
||||
+ * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
|
||||
+ * = a0 * e^n + a * (1 - e^n)
|
||||
+ *
|
||||
+ * [1] application of the geometric series:
|
||||
+ *
|
||||
+ * n 1 - x^(n+1)
|
||||
+ * S_n := \Sum x^i = -------------
|
||||
+ * i=0 1 - x
|
||||
+ */
|
||||
+static unsigned long
|
||||
+calc_load_n(unsigned long load, unsigned long exp,
|
||||
+ unsigned long active, unsigned int n)
|
||||
+{
|
||||
+
|
||||
+ return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * NO_HZ can leave us missing all per-cpu ticks calling
|
||||
+ * calc_load_account_active(), but since an idle CPU folds its delta into
|
||||
+ * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
|
||||
+ * in the pending idle delta if our idle period crossed a load cycle boundary.
|
||||
+ *
|
||||
+ * Once we've updated the global active value, we need to apply the exponential
|
||||
+ * weights adjusted to the number of cycles missed.
|
||||
+ */
|
||||
+static void calc_global_nohz(unsigned long ticks)
|
||||
+{
|
||||
+ long delta, active, n;
|
||||
+
|
||||
+ if (time_before(jiffies, calc_load_update))
|
||||
+ return;
|
||||
+
|
||||
+ /*
|
||||
+ * If we crossed a calc_load_update boundary, make sure to fold
|
||||
+ * any pending idle changes, the respective CPUs might have
|
||||
+ * missed the tick driven calc_load_account_active() update
|
||||
+ * due to NO_HZ.
|
||||
+ */
|
||||
+ delta = calc_load_fold_idle();
|
||||
+ if (delta)
|
||||
+ atomic_long_add(delta, &calc_load_tasks);
|
||||
+
|
||||
+ /*
|
||||
+ * If we were idle for multiple load cycles, apply them.
|
||||
+ */
|
||||
+ if (ticks >= LOAD_FREQ) {
|
||||
+ n = ticks / LOAD_FREQ;
|
||||
+
|
||||
+ active = atomic_long_read(&calc_load_tasks);
|
||||
+ active = active > 0 ? active * FIXED_1 : 0;
|
||||
+
|
||||
+ avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
|
||||
+ avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
|
||||
+ avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
|
||||
+
|
||||
+ calc_load_update += n * LOAD_FREQ;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Its possible the remainder of the above division also crosses
|
||||
+ * a LOAD_FREQ period, the regular check in calc_global_load()
|
||||
+ * which comes after this will take care of that.
|
||||
+ *
|
||||
+ * Consider us being 11 ticks before a cycle completion, and us
|
||||
+ * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will
|
||||
+ * age us 4 cycles, and the test in calc_global_load() will
|
||||
+ * pick up the final one.
|
||||
+ */
|
||||
+}
|
||||
#else
|
||||
static void calc_load_account_idle(struct rq *this_rq)
|
||||
{
|
||||
@@ -3157,6 +3288,10 @@ static inline long calc_load_fold_idle(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
+
|
||||
+static void calc_global_nohz(unsigned long ticks)
|
||||
+{
|
||||
+}
|
||||
#endif
|
||||
|
||||
/**
|
||||
@@ -3174,24 +3309,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
|
||||
loads[2] = (avenrun[2] + offset) << shift;
|
||||
}
|
||||
|
||||
-static unsigned long
|
||||
-calc_load(unsigned long load, unsigned long exp, unsigned long active)
|
||||
-{
|
||||
- load *= exp;
|
||||
- load += active * (FIXED_1 - exp);
|
||||
- return load >> FSHIFT;
|
||||
-}
|
||||
-
|
||||
/*
|
||||
* calc_load - update the avenrun load estimates 10 ticks after the
|
||||
* CPUs have updated calc_load_tasks.
|
||||
*/
|
||||
-void calc_global_load(void)
|
||||
+void calc_global_load(unsigned long ticks)
|
||||
{
|
||||
- unsigned long upd = calc_load_update + 10;
|
||||
long active;
|
||||
|
||||
- if (time_before(jiffies, upd))
|
||||
+ calc_global_nohz(ticks);
|
||||
+
|
||||
+ if (time_before(jiffies, calc_load_update + 10))
|
||||
return;
|
||||
|
||||
active = atomic_long_read(&calc_load_tasks);
|
||||
diff --git a/kernel/timer.c b/kernel/timer.c
|
||||
index 68a9ae7..7bd715f 100644
|
||||
--- a/kernel/timer.c
|
||||
+++ b/kernel/timer.c
|
||||
@@ -1319,7 +1319,7 @@ void do_timer(unsigned long ticks)
|
||||
{
|
||||
jiffies_64 += ticks;
|
||||
update_wall_time();
|
||||
- calc_global_load();
|
||||
+ calc_global_load(ticks);
|
||||
}
|
||||
|
||||
#ifdef __ARCH_WANT_SYS_ALARM
|
||||
|
Loading…
Reference in New Issue