Merge branch 'f13/master' of ssh://pkgs.fedoraproject.org/kernel into f13

This commit is contained in:
Jarod Wilson 2010-09-05 12:41:06 -04:00
commit c08b49e452
4 changed files with 365 additions and 0 deletions

View File

@ -785,6 +785,10 @@ Patch12521: nfs-fix-an-oops-in-the-nfsv4-atomic-open-code.patch
Patch12522: keys-fix-bug-in-keyctl-session-to-parent-if-parent-has-no-session-keyring.patch
Patch12523: keys-fix-rcu-no-lock-warning-in-keyctl-session-to-parent.patch
Patch12530: pci-msi-remove-unsafe-and-unnecessary-hardware-access.patch
Patch12531: pci-msi-restore-read_msi_msg_desc-add-get_cached_msi_msg_desc.patch
Patch12532: x86-tsc-sched-recompute-cyc2ns_offset-s-during-resume-from-sleep-states.patch
%endif
BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root
@ -1480,6 +1484,13 @@ ApplyPatch nfs-fix-an-oops-in-the-nfsv4-atomic-open-code.patch
ApplyPatch keys-fix-bug-in-keyctl-session-to-parent-if-parent-has-no-session-keyring.patch
ApplyPatch keys-fix-rcu-no-lock-warning-in-keyctl-session-to-parent.patch
# more suspend/resume fixes form 2.6.32 / 2.6.35 queue
# Fix unsafe access to MSI registers during suspend
ApplyPatch pci-msi-remove-unsafe-and-unnecessary-hardware-access.patch
ApplyPatch pci-msi-restore-read_msi_msg_desc-add-get_cached_msi_msg_desc.patch
# Fix scheduler load balancing after suspend/resume cycle
ApplyPatch x86-tsc-sched-recompute-cyc2ns_offset-s-during-resume-from-sleep-states.patch
# END OF PATCH APPLICATIONS
%endif
@ -2106,6 +2117,11 @@ fi
* Sat Sep 04 2010 Chuck Ebbert <cebbert@redhat.com> 2.6.34.6-53
- Disable asynchronous suspend, a new feature in 2.6.34
- Fix unsafe access to MSI registers during suspend
(pci-msi-remove-unsafe-and-unnecessary-hardware-access.patch,
pci-msi-restore-read_msi_msg_desc-add-get_cached_msi_msg_desc.patch)
- x86-tsc-sched-recompute-cyc2ns_offset-s-during-resume-from-sleep-states.patch
Fix load balancing after suspend/resume cycle (taken from 2.6.32 stable queue)
* Fri Sep 03 2010 Chuck Ebbert <cebbert@redhat.com> 2.6.34.6-52
- acpi-ec-pm-fix-race-between-ec-transactions-and-system-suspend.patch:

View File

@ -0,0 +1,86 @@
From fcd097f31a6ee207cc0c3da9cccd2a86d4334785 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 17 Jun 2010 20:16:36 +0100
Subject: PCI: MSI: Remove unsafe and unnecessary hardware access
From: Ben Hutchings <bhutchings@solarflare.com>
commit fcd097f31a6ee207cc0c3da9cccd2a86d4334785 upstream.
During suspend on an SMP system, {read,write}_msi_msg_desc() may be
called to mask and unmask interrupts on a device that is already in a
reduced power state. At this point memory-mapped registers including
MSI-X tables are not accessible, and config space may not be fully
functional either.
While a device is in a reduced power state its interrupts are
effectively masked and its MSI(-X) state will be restored when it is
brought back to D0. Therefore these functions can simply read and
write msi_desc::msg for devices not in D0.
Further, read_msi_msg_desc() should only ever be used to update a
previously written message, so it can always read msi_desc::msg
and never needs to touch the hardware.
Tested-by: "Michael Chan" <mchan@broadcom.com>
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
drivers/pci/msi.c | 36 ++++++++++++------------------------
1 file changed, 12 insertions(+), 24 deletions(-)
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -195,30 +195,15 @@ void unmask_msi_irq(unsigned int irq)
void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
{
struct msi_desc *entry = get_irq_desc_msi(desc);
- if (entry->msi_attrib.is_msix) {
- void __iomem *base = entry->mask_base +
- entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
- msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
- msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
- msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
- } else {
- struct pci_dev *dev = entry->dev;
- int pos = entry->msi_attrib.pos;
- u16 data;
-
- pci_read_config_dword(dev, msi_lower_address_reg(pos),
- &msg->address_lo);
- if (entry->msi_attrib.is_64) {
- pci_read_config_dword(dev, msi_upper_address_reg(pos),
- &msg->address_hi);
- pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
- } else {
- msg->address_hi = 0;
- pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
- }
- msg->data = data;
- }
+ /* We do not touch the hardware (which may not even be
+ * accessible at the moment) but return the last message
+ * written. Assert that this is valid, assuming that
+ * valid messages are not all-zeroes. */
+ BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
+ entry->msg.data));
+
+ *msg = entry->msg;
}
void read_msi_msg(unsigned int irq, struct msi_msg *msg)
@@ -231,7 +216,10 @@ void read_msi_msg(unsigned int irq, stru
void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
{
struct msi_desc *entry = get_irq_desc_msi(desc);
- if (entry->msi_attrib.is_msix) {
+
+ if (entry->dev->current_state != PCI_D0) {
+ /* Don't touch the hardware now */
+ } else if (entry->msi_attrib.is_msix) {
void __iomem *base;
base = entry->mask_base +
entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

View File

@ -0,0 +1,148 @@
From 30da55242818a8ca08583188ebcbaccd283ad4d9 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 23 Jul 2010 14:56:28 +0100
Subject: PCI: MSI: Restore read_msi_msg_desc(); add get_cached_msi_msg_desc()
From: Ben Hutchings <bhutchings@solarflare.com>
commit 30da55242818a8ca08583188ebcbaccd283ad4d9 upstream.
commit 2ca1af9aa3285c6a5f103ed31ad09f7399fc65d7 "PCI: MSI: Remove
unsafe and unnecessary hardware access" changed read_msi_msg_desc() to
return the last MSI message written instead of reading it from the
device, since it may be called while the device is in a reduced
power state.
However, the pSeries platform code really does need to read messages
from the device, since they are initially written by firmware.
Therefore:
- Restore the previous behaviour of read_msi_msg_desc()
- Add new functions get_cached_msi_msg{,_desc}() which return the
last MSI message written
- Use the new functions where appropriate
Acked-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
arch/ia64/kernel/msi_ia64.c | 2 -
arch/ia64/sn/kernel/msi_sn.c | 2 -
arch/x86/kernel/apic/io_apic.c | 2 -
drivers/pci/msi.c | 47 ++++++++++++++++++++++++++++++++++++-----
include/linux/msi.h | 2 +
5 files changed, 47 insertions(+), 8 deletions(-)
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -25,7 +25,7 @@ static int ia64_set_msi_irq_affinity(uns
if (irq_prepare_move(irq, cpu))
return -1;
- read_msi_msg(irq, &msg);
+ get_cached_msi_msg(irq, &msg);
addr = msg.address_lo;
addr &= MSI_ADDR_DEST_ID_MASK;
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -174,7 +174,7 @@ static int sn_set_msi_irq_affinity(unsig
* Release XIO resources for the old MSI PCI address
*/
- read_msi_msg(irq, &msg);
+ get_cached_msi_msg(irq, &msg);
sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
pdev = sn_pdev->pdi_linux_pcidev;
provider = SN_PCIDEV_BUSPROVIDER(pdev);
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3338,7 +3338,7 @@ static int set_msi_irq_affinity(unsigned
cfg = desc->chip_data;
- read_msi_msg_desc(desc, &msg);
+ get_cached_msi_msg_desc(desc, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -196,9 +196,46 @@ void read_msi_msg_desc(struct irq_desc *
{
struct msi_desc *entry = get_irq_desc_msi(desc);
- /* We do not touch the hardware (which may not even be
- * accessible at the moment) but return the last message
- * written. Assert that this is valid, assuming that
+ BUG_ON(entry->dev->current_state != PCI_D0);
+
+ if (entry->msi_attrib.is_msix) {
+ void __iomem *base = entry->mask_base +
+ entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+
+ msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
+ msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
+ msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
+ } else {
+ struct pci_dev *dev = entry->dev;
+ int pos = entry->msi_attrib.pos;
+ u16 data;
+
+ pci_read_config_dword(dev, msi_lower_address_reg(pos),
+ &msg->address_lo);
+ if (entry->msi_attrib.is_64) {
+ pci_read_config_dword(dev, msi_upper_address_reg(pos),
+ &msg->address_hi);
+ pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
+ } else {
+ msg->address_hi = 0;
+ pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
+ }
+ msg->data = data;
+ }
+}
+
+void read_msi_msg(unsigned int irq, struct msi_msg *msg)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ read_msi_msg_desc(desc, msg);
+}
+
+void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+{
+ struct msi_desc *entry = get_irq_desc_msi(desc);
+
+ /* Assert that the cache is valid, assuming that
* valid messages are not all-zeroes. */
BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
entry->msg.data));
@@ -206,11 +243,11 @@ void read_msi_msg_desc(struct irq_desc *
*msg = entry->msg;
}
-void read_msi_msg(unsigned int irq, struct msi_msg *msg)
+void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
{
struct irq_desc *desc = irq_to_desc(irq);
- read_msi_msg_desc(desc, msg);
+ get_cached_msi_msg_desc(desc, msg);
}
void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -14,8 +14,10 @@ struct irq_desc;
extern void mask_msi_irq(unsigned int irq);
extern void unmask_msi_irq(unsigned int irq);
extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
+extern void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
+extern void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
struct msi_desc {

View File

@ -0,0 +1,115 @@
From cd7240c0b900eb6d690ccee088a6c9b46dae815a Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Thu, 19 Aug 2010 17:03:38 -0700
Subject: x86, tsc, sched: Recompute cyc2ns_offset's during resume from sleep states
From: Suresh Siddha <suresh.b.siddha@intel.com>
commit cd7240c0b900eb6d690ccee088a6c9b46dae815a upstream.
TSC's get reset after suspend/resume (even on cpu's with invariant TSC
which runs at a constant rate across ACPI P-, C- and T-states). And in
some systems BIOS seem to reinit TSC to arbitrary large value (still
sync'd across cpu's) during resume.
This leads to a scenario of scheduler rq->clock (sched_clock_cpu()) less
than rq->age_stamp (introduced in 2.6.32). This leads to a big value
returned by scale_rt_power() and the resulting big group power set by the
update_group_power() is causing improper load balancing between busy and
idle cpu's after suspend/resume.
This resulted in multi-threaded workloads (like kernel-compilation) go
slower after suspend/resume cycle on core i5 laptops.
Fix this by recomputing cyc2ns_offset's during resume, so that
sched_clock() continues from the point where it was left off during
suspend.
Reported-by: Florian Pritz <flo@xssn.at>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1282262618.2675.24.camel@sbsiddha-MOBL3.sc.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
arch/x86/include/asm/tsc.h | 2 ++
arch/x86/kernel/tsc.c | 38 ++++++++++++++++++++++++++++++++++++++
arch/x86/power/cpu.c | 2 ++
3 files changed, 42 insertions(+)
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -59,5 +59,7 @@ extern void check_tsc_sync_source(int cp
extern void check_tsc_sync_target(void);
extern int notsc_setup(char *);
+extern void save_sched_clock_state(void);
+extern void restore_sched_clock_state(void);
#endif /* _ASM_X86_TSC_H */
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -626,6 +626,44 @@ static void set_cyc2ns_scale(unsigned lo
local_irq_restore(flags);
}
+static unsigned long long cyc2ns_suspend;
+
+void save_sched_clock_state(void)
+{
+ if (!sched_clock_stable)
+ return;
+
+ cyc2ns_suspend = sched_clock();
+}
+
+/*
+ * Even on processors with invariant TSC, TSC gets reset in some the
+ * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to
+ * arbitrary value (still sync'd across cpu's) during resume from such sleep
+ * states. To cope up with this, recompute the cyc2ns_offset for each cpu so
+ * that sched_clock() continues from the point where it was left off during
+ * suspend.
+ */
+void restore_sched_clock_state(void)
+{
+ unsigned long long offset;
+ unsigned long flags;
+ int cpu;
+
+ if (!sched_clock_stable)
+ return;
+
+ local_irq_save(flags);
+
+ get_cpu_var(cyc2ns_offset) = 0;
+ offset = cyc2ns_suspend - sched_clock();
+
+ for_each_possible_cpu(cpu)
+ per_cpu(cyc2ns_offset, cpu) = offset;
+
+ local_irq_restore(flags);
+}
+
#ifdef CONFIG_CPU_FREQ
/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -112,6 +112,7 @@ static void __save_processor_state(struc
void save_processor_state(void)
{
__save_processor_state(&saved_context);
+ save_sched_clock_state();
}
#ifdef CONFIG_X86_32
EXPORT_SYMBOL(save_processor_state);
@@ -253,6 +254,7 @@ static void __restore_processor_state(st
void restore_processor_state(void)
{
__restore_processor_state(&saved_context);
+ restore_sched_clock_state();
}
#ifdef CONFIG_X86_32
EXPORT_SYMBOL(restore_processor_state);