This commit is contained in:
parent
eaa6ce4fe2
commit
d481c533c1
|
@ -0,0 +1,112 @@
|
|||
From ecbddbb106114f90008024b4e6c3ba1c38d7ca0e Mon Sep 17 00:00:00 2001
|
||||
From: "Richard W.M. Jones" <rjones@redhat.com>
|
||||
Date: Fri, 31 Mar 2017 21:51:33 +0100
|
||||
Subject: [PATCH] main-loop: Acquire main_context lock around
|
||||
os_host_main_loop_wait.
|
||||
|
||||
When running virt-rescue the serial console hangs from time to time.
|
||||
Virt-rescue runs an ordinary Linux kernel "appliance", but there is
|
||||
only a single idle process running inside, so the qemu main loop is
|
||||
largely idle. With virt-rescue >= 1.37 you may be able to observe the
|
||||
hang by doing:
|
||||
|
||||
$ virt-rescue -e ^] --scratch
|
||||
><rescue> while true; do ls -l /usr/bin; done
|
||||
|
||||
The hang in virt-rescue can be resolved by pressing a key on the
|
||||
serial console.
|
||||
|
||||
Possibly with the same root cause, we also observed hangs during very
|
||||
early boot of regular Linux VMs with a serial console. Those hangs
|
||||
are extremely rare, but you may be able to observe them by running
|
||||
this command on baremetal for a sufficiently long time:
|
||||
|
||||
$ while libguestfs-test-tool -t 60 >& /tmp/log ; do echo -n . ; done
|
||||
|
||||
(Check in /tmp/log that the failure was caused by a hang during early
|
||||
boot, and not some other reason)
|
||||
|
||||
During investigation of this bug, Paolo Bonzini wrote:
|
||||
|
||||
> glib is expecting QEMU to use g_main_context_acquire around accesses to
|
||||
> GMainContext. However QEMU is not doing that, instead it is taking its
|
||||
> own mutex. So we should add g_main_context_acquire and
|
||||
> g_main_context_release in the two implementations of
|
||||
> os_host_main_loop_wait; these should undo the effect of Frediano's
|
||||
> glib patch.
|
||||
|
||||
This patch exactly implements Paolo's suggestion in that paragraph.
|
||||
|
||||
This fixes the serial console hang in my testing, across 3 different
|
||||
physical machines (AMD, Intel Core i7 and Intel Xeon), over many hours
|
||||
of automated testing. I wasn't able to reproduce the early boot hangs
|
||||
(but as noted above, these are extremely rare in any case).
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1435432
|
||||
Reported-by: Richard W.M. Jones <rjones@redhat.com>
|
||||
Tested-by: Richard W.M. Jones <rjones@redhat.com>
|
||||
Signed-off-by: Richard W.M. Jones <rjones@redhat.com>
|
||||
Message-Id: <20170331205133.23906-1-rjones@redhat.com>
|
||||
[Paolo: this is actually a glib bug: recent glib versions are also
|
||||
expecting g_main_context_acquire around g_poll---but that is not
|
||||
documented and probably not even intended].
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
main-loop.c | 11 +++++++++++
|
||||
1 file changed, 11 insertions(+)
|
||||
|
||||
diff --git a/main-loop.c b/main-loop.c
|
||||
index 4534c89..19cad6b 100644
|
||||
--- a/main-loop.c
|
||||
+++ b/main-loop.c
|
||||
@@ -218,9 +218,12 @@ static void glib_pollfds_poll(void)
|
||||
|
||||
static int os_host_main_loop_wait(int64_t timeout)
|
||||
{
|
||||
+ GMainContext *context = g_main_context_default();
|
||||
int ret;
|
||||
static int spin_counter;
|
||||
|
||||
+ g_main_context_acquire(context);
|
||||
+
|
||||
glib_pollfds_fill(&timeout);
|
||||
|
||||
/* If the I/O thread is very busy or we are incorrectly busy waiting in
|
||||
@@ -256,6 +259,9 @@ static int os_host_main_loop_wait(int64_t timeout)
|
||||
}
|
||||
|
||||
glib_pollfds_poll();
|
||||
+
|
||||
+ g_main_context_release(context);
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
@@ -412,12 +418,15 @@ static int os_host_main_loop_wait(int64_t timeout)
|
||||
fd_set rfds, wfds, xfds;
|
||||
int nfds;
|
||||
|
||||
+ g_main_context_acquire(context);
|
||||
+
|
||||
/* XXX: need to suppress polling by better using win32 events */
|
||||
ret = 0;
|
||||
for (pe = first_polling_entry; pe != NULL; pe = pe->next) {
|
||||
ret |= pe->func(pe->opaque);
|
||||
}
|
||||
if (ret != 0) {
|
||||
+ g_main_context_release(context);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -472,6 +481,8 @@ static int os_host_main_loop_wait(int64_t timeout)
|
||||
g_main_context_dispatch(context);
|
||||
}
|
||||
|
||||
+ g_main_context_release(context);
|
||||
+
|
||||
return select_ret || g_poll_ret;
|
||||
}
|
||||
#endif
|
||||
--
|
||||
2.9.3
|
||||
|
|
@ -68,7 +68,7 @@
|
|||
Summary: QEMU is a FAST! processor emulator
|
||||
Name: qemu
|
||||
Version: 2.7.1
|
||||
Release: 4%{?rcrel}%{?dist}
|
||||
Release: 5%{?rcrel}%{?dist}
|
||||
Epoch: 2
|
||||
License: GPLv2+ and LGPLv2+ and BSD
|
||||
Group: Development/Tools
|
||||
|
@ -243,6 +243,8 @@ Patch0065: 0065-Revert-cirrus-allow-zero-source-pitch-in-pattern-fil.patch
|
|||
Patch0066: 0066-cirrus-add-blit_is_unsafe-call-to-cirrus_bitblt_cput.patch
|
||||
# Fix spice GL with new mesa/libglvnd (bz #1431905)
|
||||
Patch0067: 0067-egl-helpers-Support-newer-MESA-versions.patch
|
||||
# Workaround hangs with recent glib (bz #1435432, gnome.org bz #761102)
|
||||
Patch0068: 0068-main-loop-Acquire-main_context-lock-around-os_host_m.patch
|
||||
|
||||
# documentation deps
|
||||
BuildRequires: texinfo
|
||||
|
@ -1713,6 +1715,9 @@ getent passwd qemu >/dev/null || \
|
|||
|
||||
|
||||
%changelog
|
||||
* Tue Apr 4 2017 Paolo Bonzini <pbonzini@redhat.com> - 2:2.7.1-5
|
||||
* Workaround hangs with recent glib (bz #1435432, gnome.org bz #761102)
|
||||
|
||||
* Wed Mar 15 2017 Cole Robinson <crobinso@redhat.com> - 2:2.7.1-4
|
||||
- CVE-2016-7907: net: imx: infinite loop (bz #1381182)
|
||||
- CVE-2017-5525: audio: memory leakage in ac97 (bz #1414110)
|
||||
|
|
Loading…
Reference in New Issue