From 3da886a92458ba1a0a0f62e0cb7905b18577d886 Mon Sep 17 00:00:00 2001 From: Cole Robinson Date: Wed, 13 May 2020 13:34:26 -0400 Subject: [PATCH] qemu-5.0.0-2 Fix iouring hang (bz #1823751) --- ...duplicate-fd-handler-deletion-in-fdm.patch | 64 ++++++++++++++ ...e-fdmon-io_uring-when-GSource-is-use.patch | 88 +++++++++++++++++++ qemu.spec | 10 ++- 3 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 0001-aio-posix-don-t-duplicate-fd-handler-deletion-in-fdm.patch create mode 100644 0002-aio-posix-disable-fdmon-io_uring-when-GSource-is-use.patch diff --git a/0001-aio-posix-don-t-duplicate-fd-handler-deletion-in-fdm.patch b/0001-aio-posix-don-t-duplicate-fd-handler-deletion-in-fdm.patch new file mode 100644 index 0000000..eff860b --- /dev/null +++ b/0001-aio-posix-don-t-duplicate-fd-handler-deletion-in-fdm.patch @@ -0,0 +1,64 @@ +From: Stefan Hajnoczi +Date: Mon, 11 May 2020 19:36:29 +0100 +Subject: [PATCH] aio-posix: don't duplicate fd handler deletion in + fdmon_io_uring_destroy() + +The io_uring file descriptor monitoring implementation has an internal +list of fd handlers that are pending submission to io_uring. +fdmon_io_uring_destroy() deletes all fd handlers on the list. + +Don't delete fd handlers directly in fdmon_io_uring_destroy() for two +reasons: +1. This duplicates the aio-posix.c AioHandler deletion code and could + become outdated if the struct changes. +2. Only handlers with the FDMON_IO_URING_REMOVE flag set are safe to + remove. If the flag is not set then something still has a pointer to + the fd handler. Let aio-posix.c and its user worry about that. In + practice this isn't an issue because fdmon_io_uring_destroy() is only + called when shutting down so all users have removed their fd + handlers, but the next patch will need this! + +Signed-off-by: Stefan Hajnoczi +--- + util/aio-posix.c | 1 + + util/fdmon-io_uring.c | 13 ++++++++++--- + 2 files changed, 11 insertions(+), 3 deletions(-) + +diff --git a/util/aio-posix.c b/util/aio-posix.c +index c3613d299e..8af334ab19 100644 +--- a/util/aio-posix.c ++++ b/util/aio-posix.c +@@ -679,6 +679,7 @@ void aio_context_destroy(AioContext *ctx) + { + fdmon_io_uring_destroy(ctx); + fdmon_epoll_disable(ctx); ++ aio_free_deleted_handlers(ctx); + } + + void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, +diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c +index d5a80ed6fb..1d14177df0 100644 +--- a/util/fdmon-io_uring.c ++++ b/util/fdmon-io_uring.c +@@ -342,11 +342,18 @@ void fdmon_io_uring_destroy(AioContext *ctx) + + io_uring_queue_exit(&ctx->fdmon_io_uring); + +- /* No need to submit these anymore, just free them. */ ++ /* Move handlers due to be removed onto the deleted list */ + while ((node = QSLIST_FIRST_RCU(&ctx->submit_list))) { ++ unsigned flags = atomic_fetch_and(&node->flags, ++ ~(FDMON_IO_URING_PENDING | ++ FDMON_IO_URING_ADD | ++ FDMON_IO_URING_REMOVE)); ++ ++ if (flags & FDMON_IO_URING_REMOVE) { ++ QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted); ++ } ++ + QSLIST_REMOVE_HEAD_RCU(&ctx->submit_list, node_submitted); +- QLIST_REMOVE(node, node); +- g_free(node); + } + + ctx->fdmon_ops = &fdmon_poll_ops; diff --git a/0002-aio-posix-disable-fdmon-io_uring-when-GSource-is-use.patch b/0002-aio-posix-disable-fdmon-io_uring-when-GSource-is-use.patch new file mode 100644 index 0000000..fe7aff0 --- /dev/null +++ b/0002-aio-posix-disable-fdmon-io_uring-when-GSource-is-use.patch @@ -0,0 +1,88 @@ +From: Stefan Hajnoczi +Date: Mon, 11 May 2020 19:36:30 +0100 +Subject: [PATCH] aio-posix: disable fdmon-io_uring when GSource is used + +The glib event loop does not call fdmon_io_uring_wait() so fd handlers +waiting to be submitted build up in the list. There is no benefit is +using io_uring when the glib GSource is being used, so disable it +instead of implementing a more complex fix. + +This fixes a memory leak where AioHandlers would build up and increasing +amounts of CPU time were spent iterating them in aio_pending(). The +symptom is that guests become slow when QEMU is built with io_uring +support. + +Buglink: https://bugs.launchpad.net/qemu/+bug/1877716 +Fixes: 73fd282e7b6dd4e4ea1c3bbb3d302c8db51e4ccf ("aio-posix: add io_uring fd monitoring implementation") +Signed-off-by: Stefan Hajnoczi +--- + include/block/aio.h | 3 +++ + util/aio-posix.c | 12 ++++++++++++ + util/aio-win32.c | 4 ++++ + util/async.c | 1 + + 4 files changed, 20 insertions(+) + +diff --git a/include/block/aio.h b/include/block/aio.h +index 62ed954344..b2f703fa3f 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -701,6 +701,9 @@ void aio_context_setup(AioContext *ctx); + */ + void aio_context_destroy(AioContext *ctx); + ++/* Used internally, do not call outside AioContext code */ ++void aio_context_use_g_source(AioContext *ctx); ++ + /** + * aio_context_set_poll_params: + * @ctx: the aio context +diff --git a/util/aio-posix.c b/util/aio-posix.c +index 8af334ab19..1b2a3af65b 100644 +--- a/util/aio-posix.c ++++ b/util/aio-posix.c +@@ -682,6 +682,18 @@ void aio_context_destroy(AioContext *ctx) + aio_free_deleted_handlers(ctx); + } + ++void aio_context_use_g_source(AioContext *ctx) ++{ ++ /* ++ * Disable io_uring when the glib main loop is used because it doesn't ++ * support mixed glib/aio_poll() usage. It relies on aio_poll() being ++ * called regularly so that changes to the monitored file descriptors are ++ * submitted, otherwise a list of pending fd handlers builds up. ++ */ ++ fdmon_io_uring_destroy(ctx); ++ aio_free_deleted_handlers(ctx); ++} ++ + void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, + int64_t grow, int64_t shrink, Error **errp) + { +diff --git a/util/aio-win32.c b/util/aio-win32.c +index 729d533faf..953c56ab48 100644 +--- a/util/aio-win32.c ++++ b/util/aio-win32.c +@@ -414,6 +414,10 @@ void aio_context_destroy(AioContext *ctx) + { + } + ++void aio_context_use_g_source(AioContext *ctx) ++{ ++} ++ + void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, + int64_t grow, int64_t shrink, Error **errp) + { +diff --git a/util/async.c b/util/async.c +index 3165a28f2f..1319eee3bc 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -362,6 +362,7 @@ static GSourceFuncs aio_source_funcs = { + + GSource *aio_get_g_source(AioContext *ctx) + { ++ aio_context_use_g_source(ctx); + g_source_ref(&ctx->source); + return &ctx->source; + } diff --git a/qemu.spec b/qemu.spec index f20a039..8797351 100644 --- a/qemu.spec +++ b/qemu.spec @@ -161,7 +161,7 @@ Summary: QEMU is a FAST! processor emulator Name: qemu Version: 5.0.0 -Release: 1%{?rcrel}%{?dist} +Release: 2%{?rcrel}%{?dist} Epoch: 2 License: GPLv2 and BSD and MIT and CC-BY URL: http://www.qemu.org/ @@ -185,6 +185,11 @@ Source20: kvm-x86.modprobe.conf # /etc/security/limits.d/95-kvm-ppc64-memlock.conf Source21: 95-kvm-ppc64-memlock.conf +# Fix iouring hang (bz #1823751) +# https://lists.gnu.org/archive/html/qemu-devel/2020-05/msg02728.html +Patch0001: 0001-aio-posix-don-t-duplicate-fd-handler-deletion-in-fdm.patch +Patch0002: 0002-aio-posix-disable-fdmon-io_uring-when-GSource-is-use.patch + # documentation deps BuildRequires: texinfo @@ -1834,6 +1839,9 @@ getent passwd qemu >/dev/null || \ %changelog +* Wed May 13 2020 Cole Robinson - 5.0.0-2 +- Fix iouring hang (bz #1823751) + * Wed May 06 2020 Cole Robinson - 5.0.0-1 - Update to version 5.0.0