Linux 2.6.35.13

This commit is contained in:
Chuck Ebbert 2011-04-28 12:40:29 -04:00
parent c6f2ed6008
commit 9e4696b4a4
8 changed files with 8 additions and 807 deletions

View File

@ -34,8 +34,8 @@ index eac44e4..320e798 100644
+ /* Apple MacBookPro6,2 */
+ { USB_DEVICE(0x05ac, 0x8218) },
+
/* AVM BlueFRITZ! USB v2.0 */
{ USB_DEVICE(0x057c, 0x3800) },
/* Apple MacBookPro8,2 */
{ USB_DEVICE(0x05ac, 0x821a) },
--
1.7.2.3

View File

@ -1,105 +0,0 @@
From 56e0d4414e5eeacd9eaf68ce93dcb80f9c62bfb4 Mon Sep 17 00:00:00 2001
From: Nelson Elhage <nelhage@ksplice.com>
Date: Wed, 3 Nov 2010 16:35:41 +0000
Subject: inet_diag: Make sure we actually run the same bytecode we audited.
We were using nlmsg_find_attr() to look up the bytecode by attribute when
auditing, but then just using the first attribute when actually running
bytecode. So, if we received a message with two attribute elements, where only
the second had type INET_DIAG_REQ_BYTECODE, we would validate and run different
bytecode strings.
Fix this by consistently using nlmsg_find_attr everywhere.
Signed-off-by: Nelson Elhage <nelhage@ksplice.com>
Signed-off-by: Thomas Graf <tgraf@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
net/ipv4/inet_diag.c | 27 ++++++++++++++++-----------
1 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e5fa2dd..7403b9b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -490,9 +490,11 @@ static int inet_csk_diag_dump(struct sock *sk,
{
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
struct inet_diag_entry entry;
- struct rtattr *bc = (struct rtattr *)(r + 1);
+ const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
+ sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
struct inet_sock *inet = inet_sk(sk);
entry.family = sk->sk_family;
@@ -512,7 +514,7 @@ static int inet_csk_diag_dump(struct sock *sk,
entry.dport = ntohs(inet->inet_dport);
entry.userlocks = sk->sk_userlocks;
- if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
return 0;
}
@@ -527,9 +529,11 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
{
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
struct inet_diag_entry entry;
- struct rtattr *bc = (struct rtattr *)(r + 1);
+ const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
+ sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
entry.family = tw->tw_family;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -548,7 +552,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
entry.dport = ntohs(tw->tw_dport);
entry.userlocks = 0;
- if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc), nla_len(bc), &entry))
return 0;
}
@@ -618,7 +622,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt;
- struct rtattr *bc = NULL;
+ const struct nlattr *bc = NULL;
struct inet_sock *inet = inet_sk(sk);
int j, s_j;
int reqnum, s_reqnum;
@@ -638,8 +642,9 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
if (!lopt || !lopt->qlen)
goto out;
- if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
- bc = (struct rtattr *)(r + 1);
+ if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
+ bc = nlmsg_find_attr(cb->nlh, sizeof(*r),
+ INET_DIAG_REQ_BYTECODE);
entry.sport = inet->inet_num;
entry.userlocks = sk->sk_userlocks;
}
@@ -672,8 +677,8 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
&ireq->rmt_addr;
entry.dport = ntohs(ireq->rmt_port);
- if (!inet_diag_bc_run(RTA_DATA(bc),
- RTA_PAYLOAD(bc), &entry))
+ if (!inet_diag_bc_run(nla_data(bc),
+ nla_len(bc), &entry))
continue;
}
--
1.7.3.2

View File

@ -1,38 +0,0 @@
From kernel-bounces@lists.fedoraproject.org Fri Apr 22 08:39:51 2011
From: Stanislaw Gruszka <sgruszka@redhat.com>
To: kernel@lists.fedoraproject.org, stable@kernel.org
Subject: [PATCH stable v2] iwlagn: Support new 5000 microcode.
Date: Fri, 22 Apr 2011 14:38:50 +0200
Message-Id: <1303475930-3856-1-git-send-email-sgruszka@redhat.com>
From: Don Fry <donald.h.fry@intel.com>
commit 41504cce240f791f1e16561db95728c5537fbad9 upstream.
New iwlwifi-5000 microcode requires driver support for API version 5.
Signed-off-by: Don Fry <donald.h.fry@intel.com>
Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
Resolves:
https://bugzilla.redhat.com/show_bug.cgi?id=695712
drivers/net/wireless/iwlwifi/iwl-5000.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c
index a28af7e..0a67558 100644
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c
@@ -51,7 +51,7 @@
#include "iwl-agn-debugfs.h"
/* Highest firmware API version supported */
-#define IWL5000_UCODE_API_MAX 2
+#define IWL5000_UCODE_API_MAX 5
#define IWL5150_UCODE_API_MAX 2
/* Lowest firmware API version supported */
--
1.7.1

View File

@ -48,7 +48,7 @@ Summary: The Linux kernel
# reset this by hand to 1 (or to 0 and then use rpmdev-bumpspec).
# scripts/rebase.sh should be made to do that for you, actually.
#
%global baserelease 90
%global baserelease 91
%global fedora_build %{baserelease}
# base_sublevel is the kernel version we're starting with and patching
@ -60,7 +60,7 @@ Summary: The Linux kernel
%if 0%{?released_kernel}
# Do we have a -stable update to apply?
%define stable_update 12
%define stable_update 13
# Is it a -stable RC?
%define stable_rc 0
# Set rpm version accordingly
@ -670,7 +670,6 @@ Patch580: linux-2.6-sparc-selinux-mprotect-checks.patch
Patch610: hda_intel-prealloc-4mb-dmabuffer.patch
Patch700: linux-2.6-e1000-ich9-montevina.patch
Patch701: iwlagn-support-new-5000-microcode.patch
Patch800: linux-2.6-crash-driver.patch
@ -785,16 +784,12 @@ Patch13645: tpm-autodetect-itpm-devices.patch
Patch13652: fix-i8k-inline-asm.patch
Patch13653: inet_diag-make-sure-we-run-the-same-bytecode-we-audited.patch
Patch13654: netlink-make-nlmsg_find_attr-take-a-const-ptr.patch
Patch13660: rtl8180-improve-signal-reporting-for-rtl8185-hardware.patch
Patch13661: rtl8180-improve-signal-reporting-for-actual-rtl8180-hardware.patch
Patch13684: tty-make-tiocgicount-a-handler.patch
Patch13685: tty-icount-changeover-for-other-main-devices.patch
Patch13690: mm-page-allocator-adjust-the-per-cpu-counter-threshold-when-memory-is-low.patch
Patch13691: mm-vmstat-use-a-single-setter-function-and-callback-for-adjusting-percpu-thresholds.patch
Patch13692: orinoco-initialise-priv_hw-before-assigning-the-interrupt.patch
@ -1367,9 +1362,6 @@ ApplyPatch hda_intel-prealloc-4mb-dmabuffer.patch
# Networking
# rhbz#695712
ApplyPatch iwlagn-support-new-5000-microcode.patch
# Misc fixes
# The input layer spews crap no-one cares about.
ApplyPatch linux-2.6-input-kill-stupid-messages.patch
@ -1528,10 +1520,6 @@ ApplyPatch tpm-autodetect-itpm-devices.patch
ApplyPatch fix-i8k-inline-asm.patch
# rhbz#651264 (CVE-2010-3880)
ApplyPatch inet_diag-make-sure-we-run-the-same-bytecode-we-audited.patch
ApplyPatch netlink-make-nlmsg_find_attr-take-a-const-ptr.patch
ApplyPatch rtl8180-improve-signal-reporting-for-rtl8185-hardware.patch
ApplyPatch rtl8180-improve-signal-reporting-for-actual-rtl8180-hardware.patch
@ -1540,7 +1528,6 @@ ApplyPatch tty-make-tiocgicount-a-handler.patch
ApplyPatch tty-icount-changeover-for-other-main-devices.patch
# backport some fixes for kswapd from mmotm, rhbz#649694
ApplyPatch mm-page-allocator-adjust-the-per-cpu-counter-threshold-when-memory-is-low.patch
ApplyPatch mm-vmstat-use-a-single-setter-function-and-callback-for-adjusting-percpu-thresholds.patch
# rhbz#657864
@ -2162,6 +2149,9 @@ fi
# and build.
%changelog
* Thu Apr 29 2011 Chuck Ebbert <cebbert@redhat.com>
- Linux 2.6.35.13
* Fri Apr 22 2011 Kyle McMartin <kmcmartin@redhat.com> 2.6.35.12-90
- iwlagn-support-new-5000-microcode.patch: stable submission patch from
sgruszka to support newer microcode versions with the iwl5000 hardware.

View File

@ -1,230 +1,3 @@
From foo
From: David Howells <dhowells@redhat.com>
Subject: Fix cred leak in AF_NETLINK
Patch cab9e9848b9a8283b0504a2d7c435a9f5ba026de to the 2.6.35.y stable tree
stored a ref to the current cred struct in struct scm_cookie. This was fine
with AF_UNIX as that calls scm_destroy() from its packet sending functions, but
AF_NETLINK, which also uses scm_send(), does not call scm_destroy() - meaning
that the copied credentials leak each time SCM data is sent over a netlink
socket.
This can be triggered quite simply on a Fedora 13 or 14 userspace with the
2.6.35.11 kernel (or something based off of that) by calling:
#!/bin/bash
for ((i=0; i<100; i++))
do
su - -c /bin/true
cut -d: -f1 /proc/slabinfo | grep 'cred\|key\|task_struct'
cat /proc/keys | wc -l
done
This leaks the session key that pam_keyinit creates for 'su -', which appears
in /proc/keys as being revoked (has the R flag set against it) afterward su is
called.
Furthermore, if CONFIG_SLAB=y, then the cred and key slab object usage counts
can be viewed and seen to increase. The key slab increases by one object per
loop, and this can be seen after the system has had a couple of minutes to
stand after the script above has been run on it.
If the system is working correctly, the key and cred counts should return to
roughly what they were before.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
net/netlink/af_netlink.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
Index: linux-2.6.35.y/net/netlink/af_netlink.c
===================================================================
--- linux-2.6.35.y.orig/net/netlink/af_netlink.c 2011-03-29 22:52:05.032059161 -0700
+++ linux-2.6.35.y/net/netlink/af_netlink.c 2011-03-29 23:53:42.295455441 -0700
@@ -1330,12 +1330,16 @@
return err;
if (msg->msg_namelen) {
- if (addr->nl_family != AF_NETLINK)
- return -EINVAL;
+ if (addr->nl_family != AF_NETLINK) {
+ err = -EINVAL;
+ goto out;
+ }
dst_pid = addr->nl_pid;
dst_group = ffs(addr->nl_groups);
- if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
- return -EPERM;
+ if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) {
+ err = -EPERM;
+ goto out;
+ }
} else {
dst_pid = nlk->dst_pid;
dst_group = nlk->dst_group;
@@ -1387,6 +1391,8 @@
err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
out:
+ scm_destroy(siocb->scm);
+ siocb->scm = NULL;
return err;
}
From c4ff4b829ef9e6353c0b133b7adb564a68054979 Mon Sep 17 00:00:00 2001
From: Rajiv Andrade <srajiv@linux.vnet.ibm.com>
Date: Fri, 12 Nov 2010 22:30:02 +0100
Subject: TPM: Long default timeout fix
From: Rajiv Andrade <srajiv@linux.vnet.ibm.com>
commit c4ff4b829ef9e6353c0b133b7adb564a68054979 upstream.
If duration variable value is 0 at this point, it's because
chip->vendor.duration wasn't filled by tpm_get_timeouts() yet.
This patch sets then the lowest timeout just to give enough
time for tpm_get_timeouts() to further succeed.
This fix avoids long boot times in case another entity attempts
to send commands to the TPM when the TPM isn't accessible.
Signed-off-by: Rajiv Andrade <srajiv@linux.vnet.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
drivers/char/tpm/tpm.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
Index: linux-2.6.35.y/drivers/char/tpm/tpm.c
===================================================================
--- linux-2.6.35.y.orig/drivers/char/tpm/tpm.c 2011-03-29 22:51:41.259667437 -0700
+++ linux-2.6.35.y/drivers/char/tpm/tpm.c 2011-03-29 23:55:12.337151500 -0700
@@ -354,12 +354,14 @@
tpm_protected_ordinal_duration[ordinal &
TPM_PROTECTED_ORDINAL_MASK];
- if (duration_idx != TPM_UNDEFINED)
+ if (duration_idx != TPM_UNDEFINED) {
duration = chip->vendor.duration[duration_idx];
- if (duration <= 0)
+ /* if duration is 0, it's because chip->vendor.duration wasn't */
+ /* filled yet, so we set the lowest timeout just to give enough */
+ /* time for tpm_get_timeouts() to succeed */
+ return (duration <= 0 ? HZ : duration);
+ } else
return 2 * 60 * HZ;
- else
- return duration;
}
EXPORT_SYMBOL_GPL(tpm_calc_ordinal_duration);
From 9b29050f8f75916f974a2d231ae5d3cd59792296 Mon Sep 17 00:00:00 2001
From: Stefan Berger <stefanb@linux.vnet.ibm.com>
Date: Tue, 11 Jan 2011 14:37:29 -0500
Subject: tpm_tis: Use timeouts returned from TPM
From: Stefan Berger <stefanb@linux.vnet.ibm.com>
commit 9b29050f8f75916f974a2d231ae5d3cd59792296 upstream.
The current TPM TIS driver in git discards the timeout values returned
from the TPM. The check of the response packet needs to consider that
the return_code field is 0 on success and the size of the expected
packet is equivalent to the header size + u32 length indicator for the
TPM_GetCapability() result + 3 timeout indicators of type u32.
I am also adding a sysfs entry 'timeouts' showing the timeouts that are
being used.
Signed-off-by: Stefan Berger <stefanb@linux.vnet.ibm.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Tested-by: Guillaume Chazarain <guichaz@gmail.com>
Signed-off-by: Rajiv Andrade <srajiv@linux.vnet.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
drivers/char/tpm/tpm.c | 18 ++++++++++++++++--
drivers/char/tpm/tpm.h | 2 ++
drivers/char/tpm/tpm_tis.c | 4 +++-
3 files changed, 21 insertions(+), 3 deletions(-)
Index: linux-2.6.35.y/drivers/char/tpm/tpm.c
===================================================================
--- linux-2.6.35.y.orig/drivers/char/tpm/tpm.c 2011-03-29 23:02:59.667308683 -0700
+++ linux-2.6.35.y/drivers/char/tpm/tpm.c 2011-03-29 23:02:59.685308223 -0700
@@ -567,9 +567,11 @@
if (rc)
return;
- if (be32_to_cpu(tpm_cmd.header.out.return_code)
- != 3 * sizeof(u32))
+ if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
+ be32_to_cpu(tpm_cmd.header.out.length)
+ != sizeof(tpm_cmd.header.out) + sizeof(u32) + 3 * sizeof(u32))
return;
+
duration_cap = &tpm_cmd.params.getcap_out.cap.duration;
chip->vendor.duration[TPM_SHORT] =
usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_short));
@@ -913,6 +915,18 @@
}
EXPORT_SYMBOL_GPL(tpm_show_caps_1_2);
+ssize_t tpm_show_timeouts(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct tpm_chip *chip = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d %d %d\n",
+ jiffies_to_usecs(chip->vendor.duration[TPM_SHORT]),
+ jiffies_to_usecs(chip->vendor.duration[TPM_MEDIUM]),
+ jiffies_to_usecs(chip->vendor.duration[TPM_LONG]));
+}
+EXPORT_SYMBOL_GPL(tpm_show_timeouts);
+
ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
Index: linux-2.6.35.y/drivers/char/tpm/tpm.h
===================================================================
--- linux-2.6.35.y.orig/drivers/char/tpm/tpm.h 2011-03-29 22:51:40.876677237 -0700
+++ linux-2.6.35.y/drivers/char/tpm/tpm.h 2011-03-29 23:02:59.685308223 -0700
@@ -56,6 +56,8 @@
char *);
extern ssize_t tpm_show_temp_deactivated(struct device *,
struct device_attribute *attr, char *);
+extern ssize_t tpm_show_timeouts(struct device *,
+ struct device_attribute *attr, char *);
struct tpm_chip;
Index: linux-2.6.35.y/drivers/char/tpm/tpm_tis.c
===================================================================
--- linux-2.6.35.y.orig/drivers/char/tpm/tpm_tis.c 2011-03-29 22:51:40.877677211 -0700
+++ linux-2.6.35.y/drivers/char/tpm/tpm_tis.c 2011-03-29 23:02:59.686308198 -0700
@@ -355,6 +355,7 @@
NULL);
static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
+static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
static struct attribute *tis_attrs[] = {
&dev_attr_pubek.attr,
@@ -364,7 +365,8 @@
&dev_attr_owned.attr,
&dev_attr_temp_deactivated.attr,
&dev_attr_caps.attr,
- &dev_attr_cancel.attr, NULL,
+ &dev_attr_cancel.attr,
+ &dev_attr_timeouts.attr, NULL,
};
static struct attribute_group tis_attr_grp = {
From c6c14330717f9850b4b4c054b81424b9979cd07d Mon Sep 17 00:00:00 2001
From: Jean-Francois Moine <moinejf@free.fr>
Date: Tue, 14 Dec 2010 16:15:37 -0300

View File

@ -1,390 +0,0 @@
From 9ced9810f0450a7f05eccb40dce4f9e4616c0fb6 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 24 Nov 2010 22:18:23 -0500
Subject: [PATCH 1/2] mm: page allocator: Adjust the per-cpu counter threshold when memory is low
Commit aa45484 ("calculate a better estimate of NR_FREE_PAGES when memory
is low") noted that watermarks were based on the vmstat NR_FREE_PAGES. To
avoid synchronization overhead, these counters are maintained on a per-cpu
basis and drained both periodically and when a threshold is above a
threshold. On large CPU systems, the difference between the estimate and
real value of NR_FREE_PAGES can be very high. The system can get into a
case where pages are allocated far below the min watermark potentially
causing livelock issues. The commit solved the problem by taking a better
reading of NR_FREE_PAGES when memory was low.
Unfortately, as reported by Shaohua Li this accurate reading can consume a
large amount of CPU time on systems with many sockets due to cache line
bouncing. This patch takes a different approach. For large machines
where counter drift might be unsafe and while kswapd is awake, the per-cpu
thresholds for the target pgdat are reduced to limit the level of drift to
what should be a safe level. This incurs a performance penalty in heavy
memory pressure by a factor that depends on the workload and the machine
but the machine should function correctly without accidentally exhausting
all memory on a node. There is an additional cost when kswapd wakes and
sleeps but the event is not expected to be frequent - in Shaohua's test
case, there was one recorded sleep and wake event at least.
To ensure that kswapd wakes up, a safe version of zone_watermark_ok() is
introduced that takes a more accurate reading of NR_FREE_PAGES when called
from wakeup_kswapd, when deciding whether it is really safe to go back to
sleep in sleeping_prematurely() and when deciding if a zone is really
balanced or not in balance_pgdat(). We are still using an expensive
function but limiting how often it is called.
When the test case is reproduced, the time spent in the watermark
functions is reduced. The following report is on the percentage of time
spent cumulatively spent in the functions zone_nr_free_pages(),
zone_watermark_ok(), __zone_watermark_ok(), zone_watermark_ok_safe(),
zone_page_state_snapshot(), zone_page_state().
vanilla 11.6615%
disable-threshold 0.2584%
Reported-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/mmzone.h | 10 ++-----
include/linux/vmstat.h | 5 +++
mm/mmzone.c | 21 ---------------
mm/page_alloc.c | 35 +++++++++++++++++++-----
mm/vmscan.c | 26 ++++++++++--------
mm/vmstat.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++-
6 files changed, 117 insertions(+), 48 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8b2db3d..1e3d0b4 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -463,12 +463,6 @@ static inline int zone_is_oom_locked(const struct zone *zone)
return test_bit(ZONE_OOM_LOCKED, &zone->flags);
}
-#ifdef CONFIG_SMP
-unsigned long zone_nr_free_pages(struct zone *zone);
-#else
-#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES)
-#endif /* CONFIG_SMP */
-
/*
* The "priority" of VM scanning is how much of the queues we will scan in one
* go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
@@ -668,7 +662,9 @@ void get_zone_counts(unsigned long *active, unsigned long *inactive,
unsigned long *free);
void build_all_zonelists(void *data);
void wakeup_kswapd(struct zone *zone, int order);
-int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+ int classzone_idx, int alloc_flags);
+bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags);
enum memmap_context {
MEMMAP_EARLY,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index eaaea37..e4cc21c 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -254,6 +254,8 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item);
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
void refresh_cpu_vm_stats(int);
+void reduce_pgdat_percpu_threshold(pg_data_t *pgdat);
+void restore_pgdat_percpu_threshold(pg_data_t *pgdat);
#else /* CONFIG_SMP */
/*
@@ -298,6 +300,9 @@ static inline void __dec_zone_page_state(struct page *page,
#define dec_zone_page_state __dec_zone_page_state
#define mod_zone_page_state __mod_zone_page_state
+static inline void reduce_pgdat_percpu_threshold(pg_data_t *pgdat) { }
+static inline void restore_pgdat_percpu_threshold(pg_data_t *pgdat) { }
+
static inline void refresh_cpu_vm_stats(int cpu) { }
#endif
diff --git a/mm/mmzone.c b/mm/mmzone.c
index e35bfb8..f5b7d17 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -87,24 +87,3 @@ int memmap_valid_within(unsigned long pfn,
return 1;
}
#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
-
-#ifdef CONFIG_SMP
-/* Called when a more accurate view of NR_FREE_PAGES is needed */
-unsigned long zone_nr_free_pages(struct zone *zone)
-{
- unsigned long nr_free_pages = zone_page_state(zone, NR_FREE_PAGES);
-
- /*
- * While kswapd is awake, it is considered the zone is under some
- * memory pressure. Under pressure, there is a risk that
- * per-cpu-counter-drift will allow the min watermark to be breached
- * potentially causing a live-lock. While kswapd is awake and
- * free pages are low, get a better estimate for free pages
- */
- if (nr_free_pages < zone->percpu_drift_mark &&
- !waitqueue_active(&zone->zone_pgdat->kswapd_wait))
- return zone_page_state_snapshot(zone, NR_FREE_PAGES);
-
- return nr_free_pages;
-}
-#endif /* CONFIG_SMP */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f7cc624..cf5d4c0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1454,24 +1454,24 @@ static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
#endif /* CONFIG_FAIL_PAGE_ALLOC */
/*
- * Return 1 if free pages are above 'mark'. This takes into account the order
+ * Return true if free pages are above 'mark'. This takes into account the order
* of the allocation.
*/
-int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
- int classzone_idx, int alloc_flags)
+static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+ int classzone_idx, int alloc_flags, long free_pages)
{
/* free_pages my go negative - that's OK */
long min = mark;
- long free_pages = zone_nr_free_pages(z) - (1 << order) + 1;
int o;
+ free_pages -= (1 << order) + 1;
if (alloc_flags & ALLOC_HIGH)
min -= min / 2;
if (alloc_flags & ALLOC_HARDER)
min -= min / 4;
if (free_pages <= min + z->lowmem_reserve[classzone_idx])
- return 0;
+ return false;
for (o = 0; o < order; o++) {
/* At the next order, this order's pages become unavailable */
free_pages -= z->free_area[o].nr_free << o;
@@ -1480,9 +1480,28 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
min >>= 1;
if (free_pages <= min)
- return 0;
+ return false;
}
- return 1;
+ return true;
+}
+
+bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+ int classzone_idx, int alloc_flags)
+{
+ return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
+ zone_page_state(z, NR_FREE_PAGES));
+}
+
+bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
+ int classzone_idx, int alloc_flags)
+{
+ long free_pages = zone_page_state(z, NR_FREE_PAGES);
+
+ if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
+ free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
+
+ return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
+ free_pages);
}
#ifdef CONFIG_NUMA
@@ -2425,7 +2444,7 @@ void show_free_areas(void)
" all_unreclaimable? %s"
"\n",
zone->name,
- K(zone_nr_free_pages(zone)),
+ K(zone_page_state(zone, NR_FREE_PAGES)),
K(min_wmark_pages(zone)),
K(low_wmark_pages(zone)),
K(high_wmark_pages(zone)),
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9753626..18f4038 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2007,7 +2007,7 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
if (zone->all_unreclaimable)
continue;
- if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
+ if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
0, 0))
return 1;
}
@@ -2104,7 +2104,7 @@ loop_again:
shrink_active_list(SWAP_CLUSTER_MAX, zone,
&sc, priority, 0);
- if (!zone_watermark_ok(zone, order,
+ if (!zone_watermark_ok_safe(zone, order,
high_wmark_pages(zone), 0, 0)) {
end_zone = i;
break;
@@ -2155,7 +2155,7 @@ loop_again:
* We put equal pressure on every zone, unless one
* zone has way too many pages free already.
*/
- if (!zone_watermark_ok(zone, order,
+ if (!zone_watermark_ok_safe(zone, order,
8*high_wmark_pages(zone), end_zone, 0))
shrink_zone(priority, zone, &sc);
reclaim_state->reclaimed_slab = 0;
@@ -2176,7 +2176,7 @@ loop_again:
total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
sc.may_writepage = 1;
- if (!zone_watermark_ok(zone, order,
+ if (!zone_watermark_ok_safe(zone, order,
high_wmark_pages(zone), end_zone, 0)) {
all_zones_ok = 0;
/*
@@ -2184,7 +2184,7 @@ loop_again:
* means that we have a GFP_ATOMIC allocation
* failure risk. Hurry up!
*/
- if (!zone_watermark_ok(zone, order,
+ if (!zone_watermark_ok_safe(zone, order,
min_wmark_pages(zone), end_zone, 0))
has_under_min_watermark_zone = 1;
}
@@ -2326,9 +2326,11 @@ static int kswapd(void *p)
* premature sleep. If not, then go fully
* to sleep until explicitly woken up
*/
- if (!sleeping_prematurely(pgdat, order, remaining))
+ if (!sleeping_prematurely(pgdat, order, remaining)) {
+ restore_pgdat_percpu_threshold(pgdat);
schedule();
- else {
+ reduce_pgdat_percpu_threshold(pgdat);
+ } else {
if (remaining)
count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
else
@@ -2364,15 +2366,16 @@ void wakeup_kswapd(struct zone *zone, int order)
if (!populated_zone(zone))
return;
- pgdat = zone->zone_pgdat;
- if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
+ if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
return;
+ pgdat = zone->zone_pgdat;
if (pgdat->kswapd_max_order < order)
pgdat->kswapd_max_order = order;
- if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
- return;
if (!waitqueue_active(&pgdat->kswapd_wait))
return;
+ if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
+ return;
+
wake_up_interruptible(&pgdat->kswapd_wait);
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 26d5716..41dc8cd 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -81,6 +81,30 @@ EXPORT_SYMBOL(vm_stat);
#ifdef CONFIG_SMP
+static int calculate_pressure_threshold(struct zone *zone)
+{
+ int threshold;
+ int watermark_distance;
+
+ /*
+ * As vmstats are not up to date, there is drift between the estimated
+ * and real values. For high thresholds and a high number of CPUs, it
+ * is possible for the min watermark to be breached while the estimated
+ * value looks fine. The pressure threshold is a reduced value such
+ * that even the maximum amount of drift will not accidentally breach
+ * the min watermark
+ */
+ watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
+ threshold = max(1, (int)(watermark_distance / num_online_cpus()));
+
+ /*
+ * Maximum threshold is 125
+ */
+ threshold = min(125, threshold);
+
+ return threshold;
+}
+
static int calculate_threshold(struct zone *zone)
{
int threshold;
@@ -159,6 +183,48 @@ static void refresh_zone_stat_thresholds(void)
}
}
+void reduce_pgdat_percpu_threshold(pg_data_t *pgdat)
+{
+ struct zone *zone;
+ int cpu;
+ int threshold;
+ int i;
+
+ get_online_cpus();
+ for (i = 0; i < pgdat->nr_zones; i++) {
+ zone = &pgdat->node_zones[i];
+ if (!zone->percpu_drift_mark)
+ continue;
+
+ threshold = calculate_pressure_threshold(zone);
+ for_each_online_cpu(cpu)
+ per_cpu_ptr(zone->pageset, cpu)->stat_threshold
+ = threshold;
+ }
+ put_online_cpus();
+}
+
+void restore_pgdat_percpu_threshold(pg_data_t *pgdat)
+{
+ struct zone *zone;
+ int cpu;
+ int threshold;
+ int i;
+
+ get_online_cpus();
+ for (i = 0; i < pgdat->nr_zones; i++) {
+ zone = &pgdat->node_zones[i];
+ if (!zone->percpu_drift_mark)
+ continue;
+
+ threshold = calculate_threshold(zone);
+ for_each_online_cpu(cpu)
+ per_cpu_ptr(zone->pageset, cpu)->stat_threshold
+ = threshold;
+ }
+ put_online_cpus();
+}
+
/*
* For use when we know that interrupts are disabled.
*/
@@ -826,7 +892,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
"\n scanned %lu"
"\n spanned %lu"
"\n present %lu",
- zone_nr_free_pages(zone),
+ zone_page_state(zone, NR_FREE_PAGES),
min_wmark_pages(zone),
low_wmark_pages(zone),
high_wmark_pages(zone),
--
1.7.3.2

View File

@ -1,29 +0,0 @@
From 38f1f0db010ac5b981ae06f1fe2fd64095ebb171 Mon Sep 17 00:00:00 2001
From: Nelson Elhage <nelhage@ksplice.com>
Date: Wed, 3 Nov 2010 16:35:40 +0000
Subject: [PATCH] netlink: Make nlmsg_find_attr take a const nlmsghdr*.
This will let us use it on a nlmsghdr stored inside a netlink_callback.
Signed-off-by: Nelson Elhage <nelhage@ksplice.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/net/netlink.h | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/include/net/netlink.h b/include/net/netlink.h
index a63b219..c344646 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -384,7 +384,7 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
*
* Returns the first attribute which matches the specified type.
*/
-static inline struct nlattr *nlmsg_find_attr(struct nlmsghdr *nlh,
+static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh,
int hdrlen, int attrtype)
{
return nla_find(nlmsg_attrdata(nlh, hdrlen),
--
1.7.3.2

View File

@ -1,2 +1,2 @@
091abeb4684ce03d1d936851618687b6 linux-2.6.35.tar.bz2
c835a799c89c296b332077a5390fcc68 patch-2.6.35.12.bz2
017e8564c850d950b91bb4ce94974c07 patch-2.6.35.13.bz2