Merge branch 'rawhide/user/kyle/kernel-git' into rawhide/user/myoung/xendom0
Conflicts: kernel.spec
This commit is contained in:
commit
ed706b65af
|
@ -3,5 +3,4 @@ patch-*.bz2
|
|||
clog
|
||||
*.rpm
|
||||
kernel-2.6.*/
|
||||
/patch-2.6.37-rc4.bz2
|
||||
/patch-2.6.37-rc4-git1.bz2
|
||||
/patch-2.6.37-rc5.bz2
|
||||
|
|
|
@ -3529,6 +3529,7 @@ CONFIG_CIFS_UPCALL=y
|
|||
CONFIG_CIFS_XATTR=y
|
||||
CONFIG_CIFS_POSIX=y
|
||||
CONFIG_CIFS_FSCACHE=y
|
||||
CONFIG_CIFS_ACL=y
|
||||
CONFIG_CIFS_WEAK_PW_HASH=y
|
||||
# CONFIG_CIFS_DEBUG2 is not set
|
||||
CONFIG_CIFS_DFS_UPCALL=y
|
||||
|
|
|
@ -438,3 +438,5 @@ CONFIG_PCH_PHUB=m
|
|||
CONFIG_VIDEO_VIA_CAMERA=m
|
||||
|
||||
CONFIG_JUMP_LABEL=y
|
||||
|
||||
CONFIG_HP_ILO=m
|
||||
|
|
1612
drm-fixes.patch
1612
drm-fixes.patch
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,44 @@
|
|||
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
|
||||
index f737960..b1f8164 100644
|
||||
--- a/drivers/gpu/drm/i915/i915_drv.c
|
||||
+++ b/drivers/gpu/drm/i915/i915_drv.c
|
||||
@@ -509,6 +509,8 @@ i915_pci_remove(struct pci_dev *pdev)
|
||||
{
|
||||
struct drm_device *dev = pci_get_drvdata(pdev);
|
||||
|
||||
+ pci_disable_device(pdev); /* core did previous enable */
|
||||
+
|
||||
drm_put_dev(dev);
|
||||
}
|
||||
|
||||
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
|
||||
index 300f64b..2e3db37 100644
|
||||
--- a/drivers/gpu/drm/i915/intel_dp.c
|
||||
+++ b/drivers/gpu/drm/i915/intel_dp.c
|
||||
@@ -795,7 +795,8 @@ static bool ironlake_edp_panel_on (struct intel_dp *intel_dp)
|
||||
{
|
||||
struct drm_device *dev = intel_dp->base.base.dev;
|
||||
struct drm_i915_private *dev_priv = dev->dev_private;
|
||||
- u32 pp, idle_on_mask = PP_ON | PP_SEQUENCE_STATE_ON_IDLE;
|
||||
+ u32 pp, idle_on = PP_ON | PP_SEQUENCE_STATE_ON_IDLE;
|
||||
+ u32 idle_on_mask = PP_ON | PP_SEQUENCE_STATE_MASK;
|
||||
|
||||
if (I915_READ(PCH_PP_STATUS) & PP_ON)
|
||||
return true;
|
||||
@@ -816,7 +817,7 @@ static bool ironlake_edp_panel_on (struct intel_dp *intel_dp)
|
||||
*/
|
||||
msleep(300);
|
||||
|
||||
- if (wait_for((I915_READ(PCH_PP_STATUS) & idle_on_mask) == idle_on_mask,
|
||||
+ if (wait_for((I915_READ(PCH_PP_STATUS) & idle_on_mask) == idle_on,
|
||||
5000))
|
||||
DRM_ERROR("panel on wait timed out: 0x%08x\n",
|
||||
I915_READ(PCH_PP_STATUS));
|
||||
@@ -922,6 +923,7 @@ static void intel_dp_prepare(struct drm_encoder *encoder)
|
||||
|
||||
if (is_edp(intel_dp)) {
|
||||
ironlake_edp_backlight_off(dev);
|
||||
+ ironlake_edp_panel_off(dev);
|
||||
ironlake_edp_panel_on(intel_dp);
|
||||
if (!is_pch_edp(intel_dp))
|
||||
ironlake_edp_pll_on(encoder);
|
47
kernel.spec
47
kernel.spec
|
@ -83,9 +83,9 @@ Summary: The Linux kernel
|
|||
# The next upstream release sublevel (base_sublevel+1)
|
||||
%define upstream_sublevel %(echo $((%{base_sublevel} + 1)))
|
||||
# The rc snapshot level
|
||||
%define rcrev 4
|
||||
%define rcrev 5
|
||||
# The git snapshot level
|
||||
%define gitrev 1
|
||||
%define gitrev 0
|
||||
# Set rpm version accordingly
|
||||
%define rpmversion 2.6.%{upstream_sublevel}
|
||||
%endif
|
||||
|
@ -650,13 +650,14 @@ Patch1555: fix_xen_guest_on_old_EC2.patch
|
|||
# DRM
|
||||
|
||||
# nouveau + drm fixes
|
||||
Patch1801: drm-fixes.patch
|
||||
Patch1810: drm-nouveau-updates.patch
|
||||
Patch1819: drm-intel-big-hammer.patch
|
||||
# intel drm is all merged upstream
|
||||
Patch1824: drm-intel-next.patch
|
||||
# make sure the lvds comes back on lid open
|
||||
Patch1825: drm-intel-make-lvds-work.patch
|
||||
Patch1826: drm-intel-edp-fixes.patch
|
||||
|
||||
Patch1900: linux-2.6-intel-iommu-igfx.patch
|
||||
|
||||
# linux1394 git patches
|
||||
|
@ -702,10 +703,10 @@ Patch12205: runtime_pm_fixups.patch
|
|||
|
||||
Patch12303: dmar-disable-when-ricoh-multifunction.patch
|
||||
|
||||
Patch12400: tty-dont-allow-reopen-when-ldisc-is-changing.patch
|
||||
Patch12401: debug-tty-print-dev-name.patch
|
||||
Patch12402: tty-ldisc-fix-open-flag-handling.patch
|
||||
Patch12403: tty-open-hangup-race-fixup.patch
|
||||
|
||||
Patch12410: mm-page-allocator-adjust-the-per-cpu-counter-threshold-when-memory-is-low.patch
|
||||
Patch12411: mm-vmstat-use-a-single-setter-function-and-callback-for-adjusting-percpu-thresholds.patch
|
||||
|
||||
# Xen patches
|
||||
# git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git branches
|
||||
|
@ -1254,7 +1255,6 @@ ApplyPatch linux-2.6-e1000-ich9-montevina.patch
|
|||
ApplyPatch fix_xen_guest_on_old_EC2.patch
|
||||
|
||||
# DRM core
|
||||
ApplyPatch drm-fixes.patch
|
||||
|
||||
# Nouveau DRM
|
||||
ApplyOptionalPatch drm-nouveau-updates.patch
|
||||
|
@ -1264,6 +1264,7 @@ ApplyOptionalPatch drm-intel-next.patch
|
|||
ApplyPatch drm-intel-big-hammer.patch
|
||||
ApplyPatch drm-intel-make-lvds-work.patch
|
||||
ApplyPatch linux-2.6-intel-iommu-igfx.patch
|
||||
ApplyPatch drm-intel-edp-fixes.patch
|
||||
|
||||
# linux1394 git patches
|
||||
#ApplyPatch linux-2.6-firewire-git-update.patch
|
||||
|
@ -1306,10 +1307,11 @@ ApplyPatch runtime_pm_fixups.patch
|
|||
ApplyPatch dmar-disable-when-ricoh-multifunction.patch
|
||||
|
||||
# rhbz#630464
|
||||
ApplyPatch tty-dont-allow-reopen-when-ldisc-is-changing.patch
|
||||
ApplyPatch debug-tty-print-dev-name.patch
|
||||
ApplyPatch tty-ldisc-fix-open-flag-handling.patch
|
||||
ApplyPatch tty-open-hangup-race-fixup.patch
|
||||
|
||||
# backport some fixes for kswapd from mmotm, rhbz#649694
|
||||
ApplyPatch mm-page-allocator-adjust-the-per-cpu-counter-threshold-when-memory-is-low.patch
|
||||
ApplyPatch mm-vmstat-use-a-single-setter-function-and-callback-for-adjusting-percpu-thresholds.patch
|
||||
|
||||
# Xen patches
|
||||
ApplyPatch xen.next-2.6.37.patch
|
||||
|
@ -1590,6 +1592,9 @@ BuildKernel() {
|
|||
mkdir -p $RPM_BUILD_ROOT/usr/src/kernels
|
||||
mv $RPM_BUILD_ROOT/lib/modules/$KernelVer/build $RPM_BUILD_ROOT/$DevelDir
|
||||
ln -sf ../../..$DevelDir $RPM_BUILD_ROOT/lib/modules/$KernelVer/build
|
||||
|
||||
# prune junk from kernel-devel
|
||||
find $RPM_BUILD_ROOT/usr/src/kernels -name ".*.cmd" -exec rm -f {} \;
|
||||
}
|
||||
|
||||
###
|
||||
|
@ -1624,7 +1629,7 @@ BuildKernel %make_target %kernel_image smp
|
|||
%endif
|
||||
|
||||
%global perf_make \
|
||||
make %{?_smp_mflags} -C tools/perf -s V=1 NO_DEMANGLE=1 prefix=%{_prefix}
|
||||
make %{?_smp_mflags} -C tools/perf -s V=1 HAVE_CPLUS_DEMANGLE=1 prefix=%{_prefix}
|
||||
%if %{with_perf}
|
||||
%{perf_make} all
|
||||
%{perf_make} man || %{doc_build_fail}
|
||||
|
@ -1708,8 +1713,6 @@ find $RPM_BUILD_ROOT/usr/include \
|
|||
\( -name .install -o -name .check -o \
|
||||
-name ..install.cmd -o -name ..check.cmd \) | xargs rm -f
|
||||
|
||||
find $RPM_BUILD_ROOT/usr/src/kernels -name ".*.cmd" -exec rm -f {} \;
|
||||
|
||||
# glibc provides scsi headers for itself, for now
|
||||
rm -rf $RPM_BUILD_ROOT/usr/include/scsi
|
||||
rm -f $RPM_BUILD_ROOT/usr/include/asm*/atomic.h
|
||||
|
@ -1929,6 +1932,24 @@ fi
|
|||
# || ||
|
||||
|
||||
%changelog
|
||||
* Tue Dec 07 2010 Kyle McMartin <kyle@redhat.com> 2.6.37-0.rc5.git0.1
|
||||
- Linux 2.6.37-rc5
|
||||
|
||||
* Sat Dec 04 2010 Kyle McMartin <kyle@redhat.com>
|
||||
- Enable C++ symbol demangling with perf by linking against libiberty.a,
|
||||
which is LGPL2.
|
||||
|
||||
* Fri Dec 03 2010 Kyle McMartin <kyle@redhat.com>
|
||||
- Linux 2.6.37-rc4-git3
|
||||
- Enable HP ILO on x86_64 for (#571329)
|
||||
- Drop merged drm-fixes.patch, split out edp-fixes.
|
||||
- tty-dont-allow-reopen-when-ldisc-is-changing.patch: upstream.
|
||||
- tty-ldisc-fix-open-flag-handling.patch: upstream.
|
||||
- Enable CIFS_ACL.
|
||||
|
||||
* Thu Dec 02 2010 Kyle McMartin <kyle@redhat.com>
|
||||
- Grab some of Mel's fixes from -mmotm to hopefully sort out #649694.
|
||||
|
||||
* Thu Dec 02 2010 Michael Young <m.a.young@durham.ac.uk>
|
||||
- Update the xen/next-2.6.37 patch and rebuild for rc4-git1
|
||||
- xen-pcifront-fixes patch is now upstream
|
||||
|
|
|
@ -0,0 +1,389 @@
|
|||
From df43fae25437d7bc7dfff72599c1e825038b67cf Mon Sep 17 00:00:00 2001
|
||||
From: Mel Gorman <mel@csn.ul.ie>
|
||||
Date: Wed, 24 Nov 2010 22:18:23 -0500
|
||||
Subject: [PATCH 1/2] mm: page allocator: Adjust the per-cpu counter threshold when memory is low
|
||||
|
||||
Commit aa45484 ("calculate a better estimate of NR_FREE_PAGES when memory
|
||||
is low") noted that watermarks were based on the vmstat NR_FREE_PAGES. To
|
||||
avoid synchronization overhead, these counters are maintained on a per-cpu
|
||||
basis and drained both periodically and when a threshold is above a
|
||||
threshold. On large CPU systems, the difference between the estimate and
|
||||
real value of NR_FREE_PAGES can be very high. The system can get into a
|
||||
case where pages are allocated far below the min watermark potentially
|
||||
causing livelock issues. The commit solved the problem by taking a better
|
||||
reading of NR_FREE_PAGES when memory was low.
|
||||
|
||||
Unfortately, as reported by Shaohua Li this accurate reading can consume a
|
||||
large amount of CPU time on systems with many sockets due to cache line
|
||||
bouncing. This patch takes a different approach. For large machines
|
||||
where counter drift might be unsafe and while kswapd is awake, the per-cpu
|
||||
thresholds for the target pgdat are reduced to limit the level of drift to
|
||||
what should be a safe level. This incurs a performance penalty in heavy
|
||||
memory pressure by a factor that depends on the workload and the machine
|
||||
but the machine should function correctly without accidentally exhausting
|
||||
all memory on a node. There is an additional cost when kswapd wakes and
|
||||
sleeps but the event is not expected to be frequent - in Shaohua's test
|
||||
case, there was one recorded sleep and wake event at least.
|
||||
|
||||
To ensure that kswapd wakes up, a safe version of zone_watermark_ok() is
|
||||
introduced that takes a more accurate reading of NR_FREE_PAGES when called
|
||||
from wakeup_kswapd, when deciding whether it is really safe to go back to
|
||||
sleep in sleeping_prematurely() and when deciding if a zone is really
|
||||
balanced or not in balance_pgdat(). We are still using an expensive
|
||||
function but limiting how often it is called.
|
||||
|
||||
When the test case is reproduced, the time spent in the watermark
|
||||
functions is reduced. The following report is on the percentage of time
|
||||
spent cumulatively spent in the functions zone_nr_free_pages(),
|
||||
zone_watermark_ok(), __zone_watermark_ok(), zone_watermark_ok_safe(),
|
||||
zone_page_state_snapshot(), zone_page_state().
|
||||
|
||||
vanilla 11.6615%
|
||||
disable-threshold 0.2584%
|
||||
|
||||
Reported-by: Shaohua Li <shaohua.li@intel.com>
|
||||
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
|
||||
Reviewed-by: Christoph Lameter <cl@linux.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
[[http://userweb.kernel.org/~akpm/mmotm/broken-out/mm-page-allocator-adjust-the-per-cpu-counter-threshold-when-memory-is-low.patch]]
|
||||
---
|
||||
include/linux/mmzone.h | 10 ++-----
|
||||
include/linux/vmstat.h | 5 +++
|
||||
mm/mmzone.c | 21 ---------------
|
||||
mm/page_alloc.c | 35 +++++++++++++++++++-----
|
||||
mm/vmscan.c | 23 +++++++++-------
|
||||
mm/vmstat.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++-
|
||||
6 files changed, 115 insertions(+), 47 deletions(-)
|
||||
|
||||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
|
||||
index 3984c4e..8d789d7 100644
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -448,12 +448,6 @@ static inline int zone_is_oom_locked(const struct zone *zone)
|
||||
return test_bit(ZONE_OOM_LOCKED, &zone->flags);
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_SMP
|
||||
-unsigned long zone_nr_free_pages(struct zone *zone);
|
||||
-#else
|
||||
-#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES)
|
||||
-#endif /* CONFIG_SMP */
|
||||
-
|
||||
/*
|
||||
* The "priority" of VM scanning is how much of the queues we will scan in one
|
||||
* go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
|
||||
@@ -651,7 +645,9 @@ typedef struct pglist_data {
|
||||
extern struct mutex zonelists_mutex;
|
||||
void build_all_zonelists(void *data);
|
||||
void wakeup_kswapd(struct zone *zone, int order);
|
||||
-int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
|
||||
+bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
|
||||
+ int classzone_idx, int alloc_flags);
|
||||
+bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
|
||||
int classzone_idx, int alloc_flags);
|
||||
enum memmap_context {
|
||||
MEMMAP_EARLY,
|
||||
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
|
||||
index eaaea37..e4cc21c 100644
|
||||
--- a/include/linux/vmstat.h
|
||||
+++ b/include/linux/vmstat.h
|
||||
@@ -254,6 +254,8 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item);
|
||||
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
|
||||
|
||||
void refresh_cpu_vm_stats(int);
|
||||
+void reduce_pgdat_percpu_threshold(pg_data_t *pgdat);
|
||||
+void restore_pgdat_percpu_threshold(pg_data_t *pgdat);
|
||||
#else /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
@@ -298,6 +300,9 @@ static inline void __dec_zone_page_state(struct page *page,
|
||||
#define dec_zone_page_state __dec_zone_page_state
|
||||
#define mod_zone_page_state __mod_zone_page_state
|
||||
|
||||
+static inline void reduce_pgdat_percpu_threshold(pg_data_t *pgdat) { }
|
||||
+static inline void restore_pgdat_percpu_threshold(pg_data_t *pgdat) { }
|
||||
+
|
||||
static inline void refresh_cpu_vm_stats(int cpu) { }
|
||||
#endif
|
||||
|
||||
diff --git a/mm/mmzone.c b/mm/mmzone.c
|
||||
index e35bfb8..f5b7d17 100644
|
||||
--- a/mm/mmzone.c
|
||||
+++ b/mm/mmzone.c
|
||||
@@ -87,24 +87,3 @@ int memmap_valid_within(unsigned long pfn,
|
||||
return 1;
|
||||
}
|
||||
#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
|
||||
-
|
||||
-#ifdef CONFIG_SMP
|
||||
-/* Called when a more accurate view of NR_FREE_PAGES is needed */
|
||||
-unsigned long zone_nr_free_pages(struct zone *zone)
|
||||
-{
|
||||
- unsigned long nr_free_pages = zone_page_state(zone, NR_FREE_PAGES);
|
||||
-
|
||||
- /*
|
||||
- * While kswapd is awake, it is considered the zone is under some
|
||||
- * memory pressure. Under pressure, there is a risk that
|
||||
- * per-cpu-counter-drift will allow the min watermark to be breached
|
||||
- * potentially causing a live-lock. While kswapd is awake and
|
||||
- * free pages are low, get a better estimate for free pages
|
||||
- */
|
||||
- if (nr_free_pages < zone->percpu_drift_mark &&
|
||||
- !waitqueue_active(&zone->zone_pgdat->kswapd_wait))
|
||||
- return zone_page_state_snapshot(zone, NR_FREE_PAGES);
|
||||
-
|
||||
- return nr_free_pages;
|
||||
-}
|
||||
-#endif /* CONFIG_SMP */
|
||||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||
index f12ad18..0286150 100644
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -1454,24 +1454,24 @@ static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
|
||||
#endif /* CONFIG_FAIL_PAGE_ALLOC */
|
||||
|
||||
/*
|
||||
- * Return 1 if free pages are above 'mark'. This takes into account the order
|
||||
+ * Return true if free pages are above 'mark'. This takes into account the order
|
||||
* of the allocation.
|
||||
*/
|
||||
-int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
|
||||
- int classzone_idx, int alloc_flags)
|
||||
+static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
|
||||
+ int classzone_idx, int alloc_flags, long free_pages)
|
||||
{
|
||||
/* free_pages my go negative - that's OK */
|
||||
long min = mark;
|
||||
- long free_pages = zone_nr_free_pages(z) - (1 << order) + 1;
|
||||
int o;
|
||||
|
||||
+ free_pages -= (1 << order) + 1;
|
||||
if (alloc_flags & ALLOC_HIGH)
|
||||
min -= min / 2;
|
||||
if (alloc_flags & ALLOC_HARDER)
|
||||
min -= min / 4;
|
||||
|
||||
if (free_pages <= min + z->lowmem_reserve[classzone_idx])
|
||||
- return 0;
|
||||
+ return false;
|
||||
for (o = 0; o < order; o++) {
|
||||
/* At the next order, this order's pages become unavailable */
|
||||
free_pages -= z->free_area[o].nr_free << o;
|
||||
@@ -1480,9 +1480,28 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
|
||||
min >>= 1;
|
||||
|
||||
if (free_pages <= min)
|
||||
- return 0;
|
||||
+ return false;
|
||||
}
|
||||
- return 1;
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
|
||||
+ int classzone_idx, int alloc_flags)
|
||||
+{
|
||||
+ return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
|
||||
+ zone_page_state(z, NR_FREE_PAGES));
|
||||
+}
|
||||
+
|
||||
+bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
|
||||
+ int classzone_idx, int alloc_flags)
|
||||
+{
|
||||
+ long free_pages = zone_page_state(z, NR_FREE_PAGES);
|
||||
+
|
||||
+ if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
|
||||
+ free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
|
||||
+
|
||||
+ return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
|
||||
+ free_pages);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
@@ -2436,7 +2455,7 @@ void show_free_areas(void)
|
||||
" all_unreclaimable? %s"
|
||||
"\n",
|
||||
zone->name,
|
||||
- K(zone_nr_free_pages(zone)),
|
||||
+ K(zone_page_state(zone, NR_FREE_PAGES)),
|
||||
K(min_wmark_pages(zone)),
|
||||
K(low_wmark_pages(zone)),
|
||||
K(high_wmark_pages(zone)),
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index c5dfabf..3e71cb1 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -2082,7 +2082,7 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
|
||||
if (zone->all_unreclaimable)
|
||||
continue;
|
||||
|
||||
- if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
|
||||
+ if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
|
||||
0, 0))
|
||||
return 1;
|
||||
}
|
||||
@@ -2169,7 +2169,7 @@ loop_again:
|
||||
shrink_active_list(SWAP_CLUSTER_MAX, zone,
|
||||
&sc, priority, 0);
|
||||
|
||||
- if (!zone_watermark_ok(zone, order,
|
||||
+ if (!zone_watermark_ok_safe(zone, order,
|
||||
high_wmark_pages(zone), 0, 0)) {
|
||||
end_zone = i;
|
||||
break;
|
||||
@@ -2215,7 +2215,7 @@ loop_again:
|
||||
* We put equal pressure on every zone, unless one
|
||||
* zone has way too many pages free already.
|
||||
*/
|
||||
- if (!zone_watermark_ok(zone, order,
|
||||
+ if (!zone_watermark_ok_safe(zone, order,
|
||||
8*high_wmark_pages(zone), end_zone, 0))
|
||||
shrink_zone(priority, zone, &sc);
|
||||
reclaim_state->reclaimed_slab = 0;
|
||||
@@ -2236,7 +2236,7 @@ loop_again:
|
||||
total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
|
||||
sc.may_writepage = 1;
|
||||
|
||||
- if (!zone_watermark_ok(zone, order,
|
||||
+ if (!zone_watermark_ok_safe(zone, order,
|
||||
high_wmark_pages(zone), end_zone, 0)) {
|
||||
all_zones_ok = 0;
|
||||
/*
|
||||
@@ -2244,7 +2244,7 @@ loop_again:
|
||||
* means that we have a GFP_ATOMIC allocation
|
||||
* failure risk. Hurry up!
|
||||
*/
|
||||
- if (!zone_watermark_ok(zone, order,
|
||||
+ if (!zone_watermark_ok_safe(zone, order,
|
||||
min_wmark_pages(zone), end_zone, 0))
|
||||
has_under_min_watermark_zone = 1;
|
||||
}
|
||||
@@ -2378,7 +2378,9 @@ static int kswapd(void *p)
|
||||
*/
|
||||
if (!sleeping_prematurely(pgdat, order, remaining)) {
|
||||
trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
|
||||
+ restore_pgdat_percpu_threshold(pgdat);
|
||||
schedule();
|
||||
+ reduce_pgdat_percpu_threshold(pgdat);
|
||||
} else {
|
||||
if (remaining)
|
||||
count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
|
||||
@@ -2417,16 +2419,17 @@ void wakeup_kswapd(struct zone *zone, int order)
|
||||
if (!populated_zone(zone))
|
||||
return;
|
||||
|
||||
- pgdat = zone->zone_pgdat;
|
||||
- if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
|
||||
+ if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
|
||||
return;
|
||||
+ pgdat = zone->zone_pgdat;
|
||||
if (pgdat->kswapd_max_order < order)
|
||||
pgdat->kswapd_max_order = order;
|
||||
- trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
|
||||
- if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
|
||||
- return;
|
||||
if (!waitqueue_active(&pgdat->kswapd_wait))
|
||||
return;
|
||||
+ if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
|
||||
+ return;
|
||||
+
|
||||
+ trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
|
||||
wake_up_interruptible(&pgdat->kswapd_wait);
|
||||
}
|
||||
|
||||
diff --git a/mm/vmstat.c b/mm/vmstat.c
|
||||
index 355a9e6..4d7faeb 100644
|
||||
--- a/mm/vmstat.c
|
||||
+++ b/mm/vmstat.c
|
||||
@@ -81,6 +81,30 @@ EXPORT_SYMBOL(vm_stat);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
+static int calculate_pressure_threshold(struct zone *zone)
|
||||
+{
|
||||
+ int threshold;
|
||||
+ int watermark_distance;
|
||||
+
|
||||
+ /*
|
||||
+ * As vmstats are not up to date, there is drift between the estimated
|
||||
+ * and real values. For high thresholds and a high number of CPUs, it
|
||||
+ * is possible for the min watermark to be breached while the estimated
|
||||
+ * value looks fine. The pressure threshold is a reduced value such
|
||||
+ * that even the maximum amount of drift will not accidentally breach
|
||||
+ * the min watermark
|
||||
+ */
|
||||
+ watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
|
||||
+ threshold = max(1, (int)(watermark_distance / num_online_cpus()));
|
||||
+
|
||||
+ /*
|
||||
+ * Maximum threshold is 125
|
||||
+ */
|
||||
+ threshold = min(125, threshold);
|
||||
+
|
||||
+ return threshold;
|
||||
+}
|
||||
+
|
||||
static int calculate_threshold(struct zone *zone)
|
||||
{
|
||||
int threshold;
|
||||
@@ -159,6 +183,48 @@ static void refresh_zone_stat_thresholds(void)
|
||||
}
|
||||
}
|
||||
|
||||
+void reduce_pgdat_percpu_threshold(pg_data_t *pgdat)
|
||||
+{
|
||||
+ struct zone *zone;
|
||||
+ int cpu;
|
||||
+ int threshold;
|
||||
+ int i;
|
||||
+
|
||||
+ get_online_cpus();
|
||||
+ for (i = 0; i < pgdat->nr_zones; i++) {
|
||||
+ zone = &pgdat->node_zones[i];
|
||||
+ if (!zone->percpu_drift_mark)
|
||||
+ continue;
|
||||
+
|
||||
+ threshold = calculate_pressure_threshold(zone);
|
||||
+ for_each_online_cpu(cpu)
|
||||
+ per_cpu_ptr(zone->pageset, cpu)->stat_threshold
|
||||
+ = threshold;
|
||||
+ }
|
||||
+ put_online_cpus();
|
||||
+}
|
||||
+
|
||||
+void restore_pgdat_percpu_threshold(pg_data_t *pgdat)
|
||||
+{
|
||||
+ struct zone *zone;
|
||||
+ int cpu;
|
||||
+ int threshold;
|
||||
+ int i;
|
||||
+
|
||||
+ get_online_cpus();
|
||||
+ for (i = 0; i < pgdat->nr_zones; i++) {
|
||||
+ zone = &pgdat->node_zones[i];
|
||||
+ if (!zone->percpu_drift_mark)
|
||||
+ continue;
|
||||
+
|
||||
+ threshold = calculate_threshold(zone);
|
||||
+ for_each_online_cpu(cpu)
|
||||
+ per_cpu_ptr(zone->pageset, cpu)->stat_threshold
|
||||
+ = threshold;
|
||||
+ }
|
||||
+ put_online_cpus();
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* For use when we know that interrupts are disabled.
|
||||
*/
|
||||
@@ -826,7 +892,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
|
||||
"\n scanned %lu"
|
||||
"\n spanned %lu"
|
||||
"\n present %lu",
|
||||
- zone_nr_free_pages(zone),
|
||||
+ zone_page_state(zone, NR_FREE_PAGES),
|
||||
min_wmark_pages(zone),
|
||||
low_wmark_pages(zone),
|
||||
high_wmark_pages(zone),
|
||||
--
|
||||
1.7.3.2
|
||||
|
|
@ -0,0 +1,167 @@
|
|||
From 82e3d4969144377d13da97d511e849e8cf3e6dcc Mon Sep 17 00:00:00 2001
|
||||
From: Mel Gorman <mel@csn.ul.ie>
|
||||
Date: Wed, 24 Nov 2010 22:24:24 -0500
|
||||
Subject: [PATCH 2/2] mm: vmstat: Use a single setter function and callback for adjusting percpu thresholds
|
||||
|
||||
reduce_pgdat_percpu_threshold() and restore_pgdat_percpu_threshold() exist
|
||||
to adjust the per-cpu vmstat thresholds while kswapd is awake to avoid
|
||||
errors due to counter drift. The functions duplicate some code so this
|
||||
patch replaces them with a single set_pgdat_percpu_threshold() that takes
|
||||
a callback function to calculate the desired threshold as a parameter.
|
||||
|
||||
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
|
||||
Reviewed-by: Christoph Lameter <cl@linux.com>
|
||||
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
[the various mmotm patches updating this were rolled up. --kyle]
|
||||
[[http://userweb.kernel.org/~akpm/mmotm/broken-out/mm-vmstat-use-a-single-setter-function-and-callback-for-adjusting-percpu-thresholds-fix-set_pgdat_percpu_threshold-dont-use-for_each_online_cpu.patch]]
|
||||
---
|
||||
include/linux/vmstat.h | 10 ++++++----
|
||||
mm/vmscan.c | 19 +++++++++++++++++--
|
||||
mm/vmstat.c | 36 +++++++-----------------------------
|
||||
3 files changed, 30 insertions(+), 35 deletions(-)
|
||||
|
||||
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
|
||||
index e4cc21c..833e676 100644
|
||||
--- a/include/linux/vmstat.h
|
||||
+++ b/include/linux/vmstat.h
|
||||
@@ -254,8 +254,11 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item);
|
||||
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
|
||||
|
||||
void refresh_cpu_vm_stats(int);
|
||||
-void reduce_pgdat_percpu_threshold(pg_data_t *pgdat);
|
||||
-void restore_pgdat_percpu_threshold(pg_data_t *pgdat);
|
||||
+
|
||||
+int calculate_pressure_threshold(struct zone *zone);
|
||||
+int calculate_normal_threshold(struct zone *zone);
|
||||
+void set_pgdat_percpu_threshold(pg_data_t *pgdat,
|
||||
+ int (*calculate_pressure)(struct zone *));
|
||||
#else /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
@@ -300,8 +303,7 @@ static inline void __dec_zone_page_state(struct page *page,
|
||||
#define dec_zone_page_state __dec_zone_page_state
|
||||
#define mod_zone_page_state __mod_zone_page_state
|
||||
|
||||
-static inline void reduce_pgdat_percpu_threshold(pg_data_t *pgdat) { }
|
||||
-static inline void restore_pgdat_percpu_threshold(pg_data_t *pgdat) { }
|
||||
+#define set_pgdat_percpu_threshold(pgdat, callback) { }
|
||||
|
||||
static inline void refresh_cpu_vm_stats(int cpu) { }
|
||||
#endif
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 3e71cb1..ba39948 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -2378,9 +2378,24 @@ static int kswapd(void *p)
|
||||
*/
|
||||
if (!sleeping_prematurely(pgdat, order, remaining)) {
|
||||
trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
|
||||
- restore_pgdat_percpu_threshold(pgdat);
|
||||
+
|
||||
+ /*
|
||||
+ * vmstat counters are not perfectly
|
||||
+ * accurate and the estimated value
|
||||
+ * for counters such as NR_FREE_PAGES
|
||||
+ * can deviate from the true value by
|
||||
+ * nr_online_cpus * threshold. To
|
||||
+ * avoid the zone watermarks being
|
||||
+ * breached while under pressure, we
|
||||
+ * reduce the per-cpu vmstat threshold
|
||||
+ * while kswapd is awake and restore
|
||||
+ * them before going back to sleep.
|
||||
+ */
|
||||
+ set_pgdat_percpu_threshold(pgdat,
|
||||
+ calculate_normal_threshold);
|
||||
schedule();
|
||||
- reduce_pgdat_percpu_threshold(pgdat);
|
||||
+ set_pgdat_percpu_threshold(pgdat,
|
||||
+ calculate_pressure_threshold);
|
||||
} else {
|
||||
if (remaining)
|
||||
count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
|
||||
diff --git a/mm/vmstat.c b/mm/vmstat.c
|
||||
index 4d7faeb..511c2c0 100644
|
||||
--- a/mm/vmstat.c
|
||||
+++ b/mm/vmstat.c
|
||||
@@ -81,7 +81,7 @@ EXPORT_SYMBOL(vm_stat);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
-static int calculate_pressure_threshold(struct zone *zone)
|
||||
+int calculate_pressure_threshold(struct zone *zone)
|
||||
{
|
||||
int threshold;
|
||||
int watermark_distance;
|
||||
@@ -105,7 +105,7 @@ static int calculate_pressure_threshold(struct zone *zone)
|
||||
return threshold;
|
||||
}
|
||||
|
||||
-static int calculate_threshold(struct zone *zone)
|
||||
+int calculate_normal_threshold(struct zone *zone)
|
||||
{
|
||||
int threshold;
|
||||
int mem; /* memory in 128 MB units */
|
||||
@@ -164,7 +164,7 @@ static void refresh_zone_stat_thresholds(void)
|
||||
for_each_populated_zone(zone) {
|
||||
unsigned long max_drift, tolerate_drift;
|
||||
|
||||
- threshold = calculate_threshold(zone);
|
||||
+ threshold = calculate_normal_threshold(zone);
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
per_cpu_ptr(zone->pageset, cpu)->stat_threshold
|
||||
@@ -183,46 +183,24 @@ static void refresh_zone_stat_thresholds(void)
|
||||
}
|
||||
}
|
||||
|
||||
-void reduce_pgdat_percpu_threshold(pg_data_t *pgdat)
|
||||
+void set_pgdat_percpu_threshold(pg_data_t *pgdat,
|
||||
+ int (*calculate_pressure)(struct zone *))
|
||||
{
|
||||
struct zone *zone;
|
||||
int cpu;
|
||||
int threshold;
|
||||
int i;
|
||||
|
||||
- get_online_cpus();
|
||||
- for (i = 0; i < pgdat->nr_zones; i++) {
|
||||
- zone = &pgdat->node_zones[i];
|
||||
- if (!zone->percpu_drift_mark)
|
||||
- continue;
|
||||
-
|
||||
- threshold = calculate_pressure_threshold(zone);
|
||||
- for_each_online_cpu(cpu)
|
||||
- per_cpu_ptr(zone->pageset, cpu)->stat_threshold
|
||||
- = threshold;
|
||||
- }
|
||||
- put_online_cpus();
|
||||
-}
|
||||
-
|
||||
-void restore_pgdat_percpu_threshold(pg_data_t *pgdat)
|
||||
-{
|
||||
- struct zone *zone;
|
||||
- int cpu;
|
||||
- int threshold;
|
||||
- int i;
|
||||
-
|
||||
- get_online_cpus();
|
||||
for (i = 0; i < pgdat->nr_zones; i++) {
|
||||
zone = &pgdat->node_zones[i];
|
||||
if (!zone->percpu_drift_mark)
|
||||
continue;
|
||||
|
||||
- threshold = calculate_threshold(zone);
|
||||
- for_each_online_cpu(cpu)
|
||||
+ threshold = (*calculate_pressure)(zone);
|
||||
+ for_each_possible_cpu(cpu)
|
||||
per_cpu_ptr(zone->pageset, cpu)->stat_threshold
|
||||
= threshold;
|
||||
}
|
||||
- put_online_cpus();
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
1.7.3.2
|
||||
|
3
sources
3
sources
|
@ -1,3 +1,2 @@
|
|||
61f3739a73afb6914cb007f37fb09b62 linux-2.6.36.tar.bz2
|
||||
854ca0c7eca8930a71a6382a7dabbf65 patch-2.6.37-rc4.bz2
|
||||
c3146fe28bb10e77d8388bc26e16483c patch-2.6.37-rc4-git1.bz2
|
||||
a84cf559615b5168ec1d5591841601ed patch-2.6.37-rc5.bz2
|
||||
|
|
|
@ -1,84 +0,0 @@
|
|||
From jirislaby@gmail.com Thu Nov 25 12:16:42 2010
|
||||
From: Jiri Slaby <jslaby@suse.cz>
|
||||
Subject: [PATCH 1/1] TTY: don't allow reopen when ldisc is changing
|
||||
Date: Thu, 25 Nov 2010 18:16:23 +0100
|
||||
|
||||
There are many WARNINGs like the following reported nowadays:
|
||||
WARNING: at drivers/tty/tty_io.c:1331 tty_open+0x2a2/0x49a()
|
||||
Hardware name: Latitude E6500
|
||||
Modules linked in:
|
||||
Pid: 1207, comm: plymouthd Not tainted 2.6.37-rc3-mmotm1123 #3
|
||||
Call Trace:
|
||||
[<ffffffff8103b189>] warn_slowpath_common+0x80/0x98
|
||||
[<ffffffff8103b1b6>] warn_slowpath_null+0x15/0x17
|
||||
[<ffffffff8128a3ab>] tty_open+0x2a2/0x49a
|
||||
[<ffffffff810fd53f>] chrdev_open+0x11d/0x146
|
||||
...
|
||||
|
||||
This means tty_reopen is called without TTY_LDISC set. For further
|
||||
considerations, note tty_lock is held in tty_open. TTY_LDISC is cleared in:
|
||||
1) __tty_hangup from tty_ldisc_hangup to tty_ldisc_enable. During this
|
||||
section tty_lock is held.
|
||||
|
||||
2) tty_release via tty_ldisc_release till the end of tty existence. If
|
||||
tty->count <= 1, tty_lock is taken, TTY_CLOSING bit set and then
|
||||
tty_ldisc_release called. tty_reopen checks TTY_CLOSING before checking
|
||||
TTY_LDISC.
|
||||
|
||||
3) tty_set_ldisc from tty_ldisc_halt to tty_ldisc_enable. We:
|
||||
* take tty_lock, set TTY_LDISC_CHANGING, put tty_lock
|
||||
* call tty_ldisc_halt (clear TTY_LDISC), tty_lock is _not_ held
|
||||
* do some other work
|
||||
* take tty_lock, call tty_ldisc_enable (set TTY_LDISC), put
|
||||
tty_lock
|
||||
|
||||
So the only option I see is 3). The solution is to check
|
||||
TTY_LDISC_CHANGING along with TTY_CLOSING in tty_reopen.
|
||||
|
||||
Nicely reproducible with two processes:
|
||||
while (1) {
|
||||
fd = open("/dev/ttyS1", O_RDWR);
|
||||
if (fd < 0) {
|
||||
warn("open");
|
||||
continue;
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
--------
|
||||
while (1) {
|
||||
fd = open("/dev/ttyS1", O_RDWR);
|
||||
ld1 = 0; ld2 = 2;
|
||||
while (1) {
|
||||
ioctl(fd, TIOCSETD, &ld1);
|
||||
ioctl(fd, TIOCSETD, &ld2);
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
|
||||
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
|
||||
Reported-by: <Valdis.Kletnieks@vt.edu>
|
||||
Cc: Kyle McMartin <kyle@mcmartin.ca>
|
||||
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
|
||||
---
|
||||
drivers/tty/tty_io.c | 3 ++-
|
||||
1 files changed, 2 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
|
||||
index c05c5af..878f6d6 100644
|
||||
--- a/drivers/tty/tty_io.c
|
||||
+++ b/drivers/tty/tty_io.c
|
||||
@@ -1310,7 +1310,8 @@ static int tty_reopen(struct tty_struct *tty)
|
||||
{
|
||||
struct tty_driver *driver = tty->driver;
|
||||
|
||||
- if (test_bit(TTY_CLOSING, &tty->flags))
|
||||
+ if (test_bit(TTY_CLOSING, &tty->flags) ||
|
||||
+ test_bit(TTY_LDISC_CHANGING, &tty->flags))
|
||||
return -EIO;
|
||||
|
||||
if (driver->type == TTY_DRIVER_TYPE_PTY &&
|
||||
--
|
||||
1.7.3.1
|
||||
|
||||
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
From linux-kernel-owner@vger.kernel.org Wed Nov 24 18:28:11 2010
|
||||
From: Jiri Slaby <jslaby@suse.cz>
|
||||
Subject: [PATCH 1/2] TTY: ldisc, fix open flag handling
|
||||
Date: Thu, 25 Nov 2010 00:27:54 +0100
|
||||
|
||||
When a concrete ldisc open fails in tty_ldisc_open, we forget to clear
|
||||
TTY_LDISC_OPEN. This causes a false warning on the next ldisc open:
|
||||
WARNING: at drivers/char/tty_ldisc.c:445 tty_ldisc_open+0x26/0x38()
|
||||
Hardware name: System Product Name
|
||||
Modules linked in: ...
|
||||
Pid: 5251, comm: a.out Tainted: G W 2.6.32-5-686 #1
|
||||
Call Trace:
|
||||
[<c1030321>] ? warn_slowpath_common+0x5e/0x8a
|
||||
[<c1030357>] ? warn_slowpath_null+0xa/0xc
|
||||
[<c119311c>] ? tty_ldisc_open+0x26/0x38
|
||||
[<c11936c5>] ? tty_set_ldisc+0x218/0x304
|
||||
...
|
||||
|
||||
So clear the bit when failing...
|
||||
|
||||
Introduced in c65c9bc3efa (tty: rewrite the ldisc locking) back in
|
||||
2.6.31-rc1.
|
||||
|
||||
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
|
||||
Cc: Alan Cox <alan@linux.intel.com>
|
||||
Reported-by: Sergey Lapin <slapin@ossfans.org>
|
||||
Tested-by: Sergey Lapin <slapin@ossfans.org>
|
||||
---
|
||||
drivers/tty/tty_ldisc.c | 2 ++
|
||||
1 files changed, 2 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
|
||||
index d8e96b0..4214d58 100644
|
||||
--- a/drivers/tty/tty_ldisc.c
|
||||
+++ b/drivers/tty/tty_ldisc.c
|
||||
@@ -454,6 +454,8 @@ static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld)
|
||||
/* BTM here locks versus a hangup event */
|
||||
WARN_ON(!tty_locked());
|
||||
ret = ld->ops->open(tty);
|
||||
+ if (ret)
|
||||
+ clear_bit(TTY_LDISC_OPEN, &tty->flags);
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
--
|
||||
1.7.3.1
|
||||
|
||||
|
||||
--
|
||||
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
|
||||
the body of a message to majordomo@vger.kernel.org
|
||||
More majordomo info at http://vger.kernel.org/majordomo-info.html
|
||||
Please read the FAQ at http://www.tux.org/lkml/
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
From 9e88e8b9915b5e067507a087437d80e6a133d612 Mon Sep 17 00:00:00 2001
|
||||
From: Jiri Slaby <jslaby@suse.cz>
|
||||
Date: Sat, 27 Nov 2010 16:06:46 +0100
|
||||
Subject: [PATCH 1/1] TTY: open/hangup race fixup
|
||||
|
||||
|
||||
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
|
||||
---
|
||||
drivers/tty/tty_io.c | 10 +++++++++-
|
||||
include/linux/tty.h | 1 +
|
||||
2 files changed, 10 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
|
||||
index 878f6d6..35480dd 100644
|
||||
--- a/drivers/tty/tty_io.c
|
||||
+++ b/drivers/tty/tty_io.c
|
||||
@@ -559,6 +559,9 @@ void __tty_hangup(struct tty_struct *tty)
|
||||
|
||||
tty_lock();
|
||||
|
||||
+ /* some functions below drop BTM, so we need this bit */
|
||||
+ set_bit(TTY_HUPPING, &tty->flags);
|
||||
+
|
||||
/* inuse_filps is protected by the single tty lock,
|
||||
this really needs to change if we want to flush the
|
||||
workqueue with the lock held */
|
||||
@@ -578,6 +581,10 @@ void __tty_hangup(struct tty_struct *tty)
|
||||
}
|
||||
spin_unlock(&tty_files_lock);
|
||||
|
||||
+ /*
|
||||
+ * it drops BTM and thus races with reopen
|
||||
+ * we protect the race by TTY_HUPPING
|
||||
+ */
|
||||
tty_ldisc_hangup(tty);
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
@@ -615,7 +622,6 @@ void __tty_hangup(struct tty_struct *tty)
|
||||
tty->session = NULL;
|
||||
tty->pgrp = NULL;
|
||||
tty->ctrl_status = 0;
|
||||
- set_bit(TTY_HUPPED, &tty->flags);
|
||||
spin_unlock_irqrestore(&tty->ctrl_lock, flags);
|
||||
|
||||
/* Account for the p->signal references we killed */
|
||||
@@ -641,6 +647,7 @@ void __tty_hangup(struct tty_struct *tty)
|
||||
* can't yet guarantee all that.
|
||||
*/
|
||||
set_bit(TTY_HUPPED, &tty->flags);
|
||||
+ clear_bit(TTY_HUPPING, &tty->flags);
|
||||
tty_ldisc_enable(tty);
|
||||
|
||||
tty_unlock();
|
||||
@@ -1311,6 +1318,7 @@ static int tty_reopen(struct tty_struct *tty)
|
||||
struct tty_driver *driver = tty->driver;
|
||||
|
||||
if (test_bit(TTY_CLOSING, &tty->flags) ||
|
||||
+ test_bit(TTY_HUPPING, &tty->flags) ||
|
||||
test_bit(TTY_LDISC_CHANGING, &tty->flags))
|
||||
return -EIO;
|
||||
|
||||
diff --git a/include/linux/tty.h b/include/linux/tty.h
|
||||
index 032d79f..54e4eaa 100644
|
||||
--- a/include/linux/tty.h
|
||||
+++ b/include/linux/tty.h
|
||||
@@ -366,6 +366,7 @@ struct tty_file_private {
|
||||
#define TTY_HUPPED 18 /* Post driver->hangup() */
|
||||
#define TTY_FLUSHING 19 /* Flushing to ldisc in progress */
|
||||
#define TTY_FLUSHPENDING 20 /* Queued buffer flush pending */
|
||||
+#define TTY_HUPPING 21 /* ->hangup() in progress */
|
||||
|
||||
#define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
|
||||
|
||||
--
|
||||
1.7.3.1
|
||||
|
Loading…
Reference in New Issue