3.0-git19 snapshot

Add epoll patch until it gets upstreamed
Add trial patch to fix the scheduling while atomic issues around pidmap_init
This commit is contained in:
Josh Boyer 2011-08-04 09:55:54 -04:00
parent 1aa2339b01
commit 8e3ae989b3
7 changed files with 196 additions and 34 deletions

View File

@ -153,7 +153,8 @@ CONFIG_ACPI_PROCESSOR_AGGREGATOR=m
CONFIG_ACPI_HED=m
CONFIG_ACPI_APEI=y
CONFIG_ACPI_APEI_PCIEAER=y
CONFIG_ACPI_APEI_GHES=m
CONFIG_ACPI_APEI_GHES=y
CONFIG_ACPI_APEI_MEMORY_FAILURE=y
# CONFIG_ACPI_APEI_EINJ is not set
CONFIG_ACPI_IPMI=m
CONFIG_ACPI_CUSTOM_METHOD=m

View File

@ -87,7 +87,8 @@ CONFIG_ACPI_PROCESSOR_AGGREGATOR=m
CONFIG_ACPI_HED=m
CONFIG_ACPI_APEI=y
CONFIG_ACPI_APEI_PCIEAER=y
CONFIG_ACPI_APEI_GHES=m
CONFIG_ACPI_APEI_GHES=y
CONFIG_ACPI_APEI_MEMORY_FAILURE=y
# CONFIG_ACPI_APEI_EINJ is not set
CONFIG_ACPI_IPMI=m
CONFIG_ACPI_CUSTOM_METHOD=m

View File

@ -0,0 +1,116 @@
epoll can acquire multiple ep->mutex on multiple "struct eventpoll"s
at once in the case where one epoll fd is monitoring another epoll
fd. This is perfectly OK, since we're careful about the lock ordering,
but causes spurious lockdep warnings. Annotate the recursion using
mutex_lock_nested, and add a comment explaining the nesting rules for
good measure.
Reported-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Nelson Elhage <nelhage@nelhage.com>
---
I've tested this on a synthetic epoll test case, that just adds e1 to
e2 and then does an epoll_wait(). I verified that it caused lockdep
problems on 3.0 and that this patch fixed it, but I haven't done more
extensive testing. Paul, are you able to test systemd against this?
fs/eventpoll.c | 25 ++++++++++++++++++-------
1 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f9cfd16..0cb7bc6 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -76,6 +76,15 @@
* Events that require holding "epmutex" are very rare, while for
* normal operations the epoll private "ep->mtx" will guarantee
* a better scalability.
+ * It is possible to acquire multiple "ep->mtx"es at once in the case
+ * when one epoll fd is added to another. In this case, we always
+ * acquire the locks in the order of nesting (i.e. after epoll_ctl(e1,
+ * EPOLL_CTL_ADD, e2), e1->mtx will always be acquired before
+ * e2->mtx). Since we disallow cycles of epoll file descriptors, this
+ * ensures that the mutexes are well-ordered. In order to communicate
+ * this nesting to lockdep, when walking a tree of epoll file
+ * descriptors, we use the current recursion depth as the lockdep
+ * subkey.
*/
/* Epoll private bits inside the event mask */
@@ -464,13 +473,15 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
* @ep: Pointer to the epoll private data structure.
* @sproc: Pointer to the scan callback.
* @priv: Private opaque data passed to the @sproc callback.
+ * @depth: The current depth of recursive f_op->poll calls.
*
* Returns: The same integer error code returned by the @sproc callback.
*/
static int ep_scan_ready_list(struct eventpoll *ep,
int (*sproc)(struct eventpoll *,
struct list_head *, void *),
- void *priv)
+ void *priv,
+ int depth)
{
int error, pwake = 0;
unsigned long flags;
@@ -481,7 +492,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
* We need to lock this because we could be hit by
* eventpoll_release_file() and epoll_ctl().
*/
- mutex_lock(&ep->mtx);
+ mutex_lock_nested(&ep->mtx, depth);
/*
* Steal the ready list, and re-init the original one to the
@@ -670,7 +681,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
{
- return ep_scan_ready_list(priv, ep_read_events_proc, NULL);
+ return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
}
static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
@@ -737,7 +748,7 @@ void eventpoll_release_file(struct file *file)
ep = epi->ep;
list_del_init(&epi->fllink);
- mutex_lock(&ep->mtx);
+ mutex_lock_nested(&ep->mtx, 0);
ep_remove(ep, epi);
mutex_unlock(&ep->mtx);
}
@@ -1134,7 +1145,7 @@ static int ep_send_events(struct eventpoll *ep,
esed.maxevents = maxevents;
esed.events = events;
- return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
+ return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
}
static inline struct timespec ep_set_mstimeout(long ms)
@@ -1267,7 +1278,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
struct rb_node *rbp;
struct epitem *epi;
- mutex_lock(&ep->mtx);
+ mutex_lock_nested(&ep->mtx, call_nests + 1);
for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) {
@@ -1409,7 +1420,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
}
- mutex_lock(&ep->mtx);
+ mutex_lock_nested(&ep->mtx, 0);
/*
* Try to lookup the file inside our RB tree, Since we grabbed "mtx"
--
1.7.4.1
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html

View File

@ -84,7 +84,7 @@ Summary: The Linux kernel
# The rc snapshot level
%define rcrev 0
# The git snapshot level
%define gitrev 17
%define gitrev 19
# Set rpm version accordingly
%define rpmversion 3.%{upstream_sublevel}.0
%endif
@ -683,6 +683,8 @@ Patch12021: udlfb-bind-framebuffer-to-interface.patch
Patch12022: fix-cdc-ncm-dma-stack-vars.patch
Patch12023: ums-realtek-driver-uses-stack-memory-for-DMA.patch
Patch12024: epoll-fix-spurious-lockdep-warnings.patch
Patch12025: rcu-prevent-early-schedule.patch
# Runtime power management
Patch12203: linux-2.6-usb-pci-autosuspend.patch
@ -1260,6 +1262,8 @@ ApplyPatch neuter_intel_microcode_load.patch
ApplyPatch udlfb-bind-framebuffer-to-interface.patch
ApplyPatch fix-cdc-ncm-dma-stack-vars.patch
ApplyPatch ums-realtek-driver-uses-stack-memory-for-DMA.patch
ApplyPatch epoll-fix-spurious-lockdep-warnings.patch
ApplyPatch rcu-prevent-early-schedule.patch
# Runtime PM
#ApplyPatch linux-2.6-usb-pci-autosuspend.patch
@ -1891,6 +1895,11 @@ fi
# ||----w |
# || ||
%changelog
* Thu Aug 04 2011 Josh Boyer <jwboyer@redhat.com>
- Linux 3.0-git19
- Add patch to fix epoll backtrace (rhbz 722472)
- Add trial patch to fix rhbz 726877
* Wed Aug 03 2011 Dave Jones <davej@redhat.com>
- Re-apply the rebased utrace

View File

@ -1,17 +1,17 @@
From df42d15cd28f468ecd4c30465b98a53cce90617c Mon Sep 17 00:00:00 2001
From f72d640713d01b3b704c6e84ab49b62f19fc9c22 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@phobos.i.jkkm.org>
Date: Tue, 30 Mar 2010 00:16:25 -0400
Subject: dev-crash-driver.patch
Subject: [PATCH] dev-crash-driver.patch
---
arch/ia64/include/asm/crash.h | 90 +++++++++++++++++++++++++++++
arch/ia64/kernel/ia64_ksyms.c | 3 +
arch/x86/include/asm/crash.h | 75 ++++++++++++++++++++++++
arch/x86/mm/ioremap.c | 2 +
drivers/char/Kconfig | 2 +
drivers/char/Kconfig | 3 +
drivers/char/Makefile | 2 +
drivers/char/crash.c | 128 +++++++++++++++++++++++++++++++++++++++++
7 files changed, 302 insertions(+), 0 deletions(-)
7 files changed, 303 insertions(+), 0 deletions(-)
create mode 100644 arch/ia64/include/asm/crash.h
create mode 100644 arch/x86/include/asm/crash.h
create mode 100644 drivers/char/crash.c
@ -208,7 +208,7 @@ index 0000000..dfcc006
+
+#endif /* _ASM_I386_CRASH_H */
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 5eb1ba7..3e525d2 100644
index be1ef57..ac659f7 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -24,6 +24,8 @@
@ -220,6 +220,30 @@ index 5eb1ba7..3e525d2 100644
/*
* Fix up the linear direct mapping of the kernel to avoid cache attribute
* conflicts.
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 423fd56..e04a561 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -4,6 +4,9 @@
menu "Character devices"
+config CRASH
+ tristate "Crash Utility memory driver"
+
source "drivers/tty/Kconfig"
config DEVKMEM
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 32762ba..3d5d525 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -65,3 +65,5 @@ obj-$(CONFIG_JS_RTC) += js-rtc.o
js-rtc-y = rtc.o
obj-$(CONFIG_TILE_SROM) += tile-srom.o
+
+obj-$(CONFIG_CRASH) += crash.o
diff --git a/drivers/char/crash.c b/drivers/char/crash.c
new file mode 100644
index 0000000..e5437de
@ -354,29 +378,6 @@ index 0000000..e5437de
+module_exit(crash_cleanup_module);
+
+MODULE_LICENSE("GPL");
--
1.7.6
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index ba53ec9..6588b33 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -98,3 +98,5 @@ obj-$(CONFIG_RAMOOPS) += ramoops.o
obj-$(CONFIG_JS_RTC) += js-rtc.o
js-rtc-y = rtc.o
+
+obj-$(CONFIG_CRASH) += crash.o
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 04f8b2d..e8fb997 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -4,6 +4,9 @@
menu "Character devices"
+config CRASH
+ tristate "Crash Utility memory driver"
+
source "drivers/tty/Kconfig"
config DEVKMEM

View File

@ -0,0 +1,34 @@
From 05b0c3b7b05471a6d53300cf6d7d88ee66eff7ed Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 4 Aug 2011 11:10:24 -0400
Subject: [PATCH] rcu: Prevent early boot set_need_resched() from
__rcu_pending()
There isn't a whole lot of point in poking the scheduler before there
are other tasks to switch to. This commit therefore adds a check
for rcu_scheduler_fully_active in __rcu_pending() to suppress any
pre-scheduler calls to set_need_resched(). The downside of this approach
is additional runtime overhead in a reasonably hot code path.
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
kernel/rcutree.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ba06207..9c8a3f9 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1671,7 +1671,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
check_cpu_stall(rsp, rdp);
/* Is the RCU core waiting for a quiescent state from this CPU? */
- if (rdp->qs_pending && !rdp->passed_quiesc) {
+ if (rcu_scheduler_fully_active && rdp->qs_pending && !rdp->passed_quiesc) {
/*
* If force_quiescent_state() coming soon and this CPU
--
1.7.6

View File

@ -1,2 +1,2 @@
398e95866794def22b12dfbc15ce89c0 linux-3.0.tar.bz2
038198e0406fd87c5dd9a1e6312bfb47 patch-3.0-git17.bz2
0ce6ed9536107e6ea7d49de92c0d580e patch-3.0-git19.bz2