Pull in patches to add PollLimit setting

This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2023-09-27 14:05:12 +02:00
parent 9a522c2a5a
commit bb2f5f0fab
4 changed files with 407 additions and 0 deletions

View File

@ -0,0 +1,243 @@
From df25afd2cf5527fe1bb542bb146fef1be8d9a489 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Sat, 9 Sep 2023 14:46:32 +0200
Subject: [PATCH 1/3] core: add new "PollLimit" settings to .socket units
This adds a new "PollLimit" pair of settings to .socket units, very
similar to existing "TriggerLimit" logic. The differences are:
* PollLimit focusses on the polling on the sockets, and pauses that
temporarily if a ratelimit on that is reached. TriggerLimit otoh
focusses on the triggering effect of socket units, and stops
triggering once the ratelimit is hit.
* While the trigger limit being hit is an action that causes the socket
unit to fail the polling limit being reached will just temporarily
disable polling on the socket fd, and it is resumed once the ratelimit
interval is over.
* When a socket unit operates on multiple socket fds (e,g, ListenStream=
on both some ipv6 and an ipv4 address or so). Then the PollLimit will
be specific to each fd, while the trigger limit is specific to the
whole unit.
Implementation-wise this is mostly a wrapper around sd-event's
sd_event_source_set_ratelimit(), which exposes the desired behaviour
directly.
Usecase for all of this: socket services which when overloaded with
connections should just slow down reception of it, but not fail
persistently.
(cherry picked from commit 2bec84e7a5bf3687ae65205753ba3d8067cf2f0e)
---
man/org.freedesktop.systemd1.xml | 12 ++++++++++
src/core/dbus-socket.c | 8 +++++++
src/core/load-fragment-gperf.gperf.in | 2 ++
src/core/socket.c | 32 +++++++++++++++++++--------
src/core/socket.h | 2 ++
src/shared/bus-unit-util.c | 10 +++++----
6 files changed, 53 insertions(+), 13 deletions(-)
diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml
index 56906e2f3b..0557dc2379 100644
--- a/man/org.freedesktop.systemd1.xml
+++ b/man/org.freedesktop.systemd1.xml
@@ -4727,6 +4727,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
readonly t TriggerLimitIntervalUSec = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly u TriggerLimitBurst = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+ readonly t PollLimitIntervalUSec = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+ readonly u PollLimitBurst = ...;
readonly u UID = ...;
readonly u GID = ...;
@org.freedesktop.DBus.Property.EmitsChangedSignal("invalidates")
@@ -5961,6 +5965,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
<variablelist class="dbus-property" generated="True" extra-ref="TriggerLimitBurst"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="PollLimitIntervalUSec"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="PollLimitBurst"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="UID"/>
<variablelist class="dbus-property" generated="True" extra-ref="GID"/>
@@ -6497,6 +6505,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
<!--End of Autogenerated section-->
+ <para><varname>PollLimitIntervalUSec</varname>/<varname>PollLimitBurst</varname> properties configure the
+ polling limit for the socket unit. Expects a time in µs, resp. an unsigned integer. If either is set to
+ zero the limiting feature is turned off.</para>
+
<refsect2>
<title>Properties</title>
diff --git a/src/core/dbus-socket.c b/src/core/dbus-socket.c
index 09a3a9502b..04552b7c60 100644
--- a/src/core/dbus-socket.c
+++ b/src/core/dbus-socket.c
@@ -129,6 +129,8 @@ const sd_bus_vtable bus_socket_vtable[] = {
SD_BUS_PROPERTY("SocketProtocol", "i", bus_property_get_int, offsetof(Socket, socket_protocol), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TriggerLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, trigger_limit.interval), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("TriggerLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, trigger_limit.burst), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PollLimitIntervalUSec", "t", bus_property_get_usec, offsetof(Socket, poll_limit_interval), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PollLimitBurst", "u", bus_property_get_unsigned, offsetof(Socket, poll_limit_burst), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Socket, exec_command[SOCKET_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
@@ -248,6 +250,9 @@ static int bus_socket_set_transient_property(
if (streq(name, "TriggerLimitBurst"))
return bus_set_transient_unsigned(u, name, &s->trigger_limit.burst, message, flags, error);
+ if (streq(name, "PollLimitBurst"))
+ return bus_set_transient_unsigned(u, name, &s->poll_limit_burst, message, flags, error);
+
if (streq(name, "SocketMode"))
return bus_set_transient_mode_t(u, name, &s->socket_mode, message, flags, error);
@@ -275,6 +280,9 @@ static int bus_socket_set_transient_property(
if (streq(name, "TriggerLimitIntervalUSec"))
return bus_set_transient_usec(u, name, &s->trigger_limit.interval, message, flags, error);
+ if (streq(name, "PollLimitIntervalUSec"))
+ return bus_set_transient_usec(u, name, &s->poll_limit_interval, message, flags, error);
+
if (streq(name, "SmackLabel"))
return bus_set_transient_string(u, name, &s->smack, message, flags, error);
diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in
index b66adf2811..0d1ee9c231 100644
--- a/src/core/load-fragment-gperf.gperf.in
+++ b/src/core/load-fragment-gperf.gperf.in
@@ -507,6 +507,8 @@ Socket.FileDescriptorName, config_parse_fdname,
Socket.Service, config_parse_socket_service, 0, 0
Socket.TriggerLimitIntervalSec, config_parse_sec, 0, offsetof(Socket, trigger_limit.interval)
Socket.TriggerLimitBurst, config_parse_unsigned, 0, offsetof(Socket, trigger_limit.burst)
+Socket.PollLimitIntervalSec, config_parse_sec, 0, offsetof(Socket, poll_limit_interval)
+Socket.PollLimitBurst, config_parse_unsigned, 0, offsetof(Socket, poll_limit_burst)
{% if ENABLE_SMACK %}
Socket.SmackLabel, config_parse_unit_string_printf, 0, offsetof(Socket, smack)
Socket.SmackLabelIPIn, config_parse_unit_string_printf, 0, offsetof(Socket, smack_ip_in)
diff --git a/src/core/socket.c b/src/core/socket.c
index 75034ac357..dc18744f54 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -101,6 +101,9 @@ static void socket_init(Unit *u) {
s->trigger_limit.interval = USEC_INFINITY;
s->trigger_limit.burst = UINT_MAX;
+
+ s->poll_limit_interval = USEC_INFINITY;
+ s->poll_limit_burst = UINT_MAX;
}
static void socket_unwatch_control_pid(Socket *s) {
@@ -310,17 +313,20 @@ static int socket_add_extras(Socket *s) {
* off the queues, which it might not necessarily do. Moreover, while Accept=no services are supposed to
* process whatever is queued in one go, and thus should normally never have to be started frequently. This is
* different for Accept=yes where each connection is processed by a new service instance, and thus frequent
- * service starts are typical. */
+ * service starts are typical.
+ *
+ * For the poll limit we follow a similar rule, but use 3/4th of the trigger limit parameters, to
+ * trigger this earlier. */
if (s->trigger_limit.interval == USEC_INFINITY)
s->trigger_limit.interval = 2 * USEC_PER_SEC;
+ if (s->trigger_limit.burst == UINT_MAX)
+ s->trigger_limit.burst = s->accept ? 200 : 20;
- if (s->trigger_limit.burst == UINT_MAX) {
- if (s->accept)
- s->trigger_limit.burst = 200;
- else
- s->trigger_limit.burst = 20;
- }
+ if (s->poll_limit_interval == USEC_INFINITY)
+ s->poll_limit_interval = 2 * USEC_PER_SEC;
+ if (s->poll_limit_burst == UINT_MAX)
+ s->poll_limit_burst = s->accept ? 150 : 15;
if (have_non_accept_socket(s)) {
@@ -770,9 +776,13 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
fprintf(f,
"%sTriggerLimitIntervalSec: %s\n"
- "%sTriggerLimitBurst: %u\n",
+ "%sTriggerLimitBurst: %u\n"
+ "%sPollLimitIntervalSec: %s\n"
+ "%sPollLimitBurst: %u\n",
prefix, FORMAT_TIMESPAN(s->trigger_limit.interval, USEC_PER_SEC),
- prefix, s->trigger_limit.burst);
+ prefix, s->trigger_limit.burst,
+ prefix, FORMAT_TIMESPAN(s->poll_limit_interval, USEC_PER_SEC),
+ prefix, s->poll_limit_burst);
str = ip_protocol_to_name(s->socket_protocol);
if (str)
@@ -1765,6 +1775,10 @@ static int socket_watch_fds(Socket *s) {
(void) sd_event_source_set_description(p->event_source, "socket-port-io");
}
+
+ r = sd_event_source_set_ratelimit(p->event_source, s->poll_limit_interval, s->poll_limit_burst);
+ if (r < 0)
+ log_unit_debug_errno(UNIT(s), r, "Failed to set poll limit on I/O event source, ignoring: %m");
}
return 0;
diff --git a/src/core/socket.h b/src/core/socket.h
index 191d27f46d..b03a291e4a 100644
--- a/src/core/socket.h
+++ b/src/core/socket.h
@@ -158,6 +158,8 @@ struct Socket {
char *fdname;
RateLimit trigger_limit;
+ usec_t poll_limit_interval;
+ unsigned poll_limit_burst;
};
SocketPeer *socket_peer_ref(SocketPeer *p);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index e7b44cc39b..9f0f37488d 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -2170,10 +2170,10 @@ static int bus_append_path_property(sd_bus_message *m, const char *field, const
return 1;
}
- if (streq(field, "TriggerLimitBurst"))
+ if (STR_IN_SET(field, "TriggerLimitBurst", "PollLimitBurst"))
return bus_append_safe_atou(m, field, eq);
- if (streq(field, "TriggerLimitIntervalSec"))
+ if (STR_IN_SET(field, "TriggerLimitIntervalSec", "PollLimitIntervalSec"))
return bus_append_parse_sec_rename(m, field, eq);
return 0;
@@ -2382,7 +2382,8 @@ static int bus_append_socket_property(sd_bus_message *m, const char *field, cons
"MaxConnections",
"MaxConnectionsPerSource",
"KeepAliveProbes",
- "TriggerLimitBurst"))
+ "TriggerLimitBurst",
+ "PollLimitBurst"))
return bus_append_safe_atou(m, field, eq);
if (STR_IN_SET(field, "SocketMode",
@@ -2397,7 +2398,8 @@ static int bus_append_socket_property(sd_bus_message *m, const char *field, cons
"KeepAliveTimeSec",
"KeepAliveIntervalSec",
"DeferAcceptSec",
- "TriggerLimitIntervalSec"))
+ "TriggerLimitIntervalSec",
+ "PollLimitIntervalSec"))
return bus_append_parse_sec_rename(m, field, eq);
if (STR_IN_SET(field, "ReceiveBuffer",

View File

@ -0,0 +1,80 @@
From f6b09a2ed646f0a0b54605d4c19a898ab2bbf192 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 18 Sep 2023 17:51:49 +0200
Subject: [PATCH 2/3] man: document the new
PollLimitIntervalSec=/PollLimitBurst= settings
(cherry picked from commit 9373fce68de183a615d44fe100dcf22e3c9b8c3e)
---
man/systemd.socket.xml | 58 ++++++++++++++++++++++++++++++++++--------
1 file changed, 47 insertions(+), 11 deletions(-)
diff --git a/man/systemd.socket.xml b/man/systemd.socket.xml
index 45555302f1..462978d438 100644
--- a/man/systemd.socket.xml
+++ b/man/systemd.socket.xml
@@ -830,17 +830,53 @@
<term><varname>TriggerLimitIntervalSec=</varname></term>
<term><varname>TriggerLimitBurst=</varname></term>
- <listitem><para>Configures a limit on how often this socket unit may be activated within a specific time
- interval. The <varname>TriggerLimitIntervalSec=</varname> may be used to configure the length of the time
- interval in the usual time units <literal>us</literal>, <literal>ms</literal>, <literal>s</literal>,
- <literal>min</literal>, <literal>h</literal>, … and defaults to 2s (See
- <citerefentry><refentrytitle>systemd.time</refentrytitle><manvolnum>7</manvolnum></citerefentry> for details on
- the various time units understood). The <varname>TriggerLimitBurst=</varname> setting takes a positive integer
- value and specifies the number of permitted activations per time interval, and defaults to 200 for
- <varname>Accept=yes</varname> sockets (thus by default permitting 200 activations per 2s), and 20 otherwise (20
- activations per 2s). Set either to 0 to disable any form of trigger rate limiting. If the limit is hit, the
- socket unit is placed into a failure mode, and will not be connectible anymore until restarted. Note that this
- limit is enforced before the service activation is enqueued.</para></listitem>
+ <listitem><para>Configures a limit on how often this socket unit may be activated within a specific
+ time interval. The <varname>TriggerLimitIntervalSec=</varname> setting may be used to configure the
+ length of the time interval in the usual time units <literal>us</literal>, <literal>ms</literal>,
+ <literal>s</literal>, <literal>min</literal>, <literal>h</literal>, … and defaults to 2s (See
+ <citerefentry><refentrytitle>systemd.time</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
+ details on the various time units understood). The <varname>TriggerLimitBurst=</varname> setting
+ takes a positive integer value and specifies the number of permitted activations per time interval,
+ and defaults to 200 for <varname>Accept=yes</varname> sockets (thus by default permitting 200
+ activations per 2s), and 20 otherwise (20 activations per 2s). Set either to 0 to disable any form of
+ trigger rate limiting.</para>
+
+ <para>If the limit is hit, the socket unit is placed into a failure mode, and will not be connectible
+ anymore until restarted. Note that this limit is enforced before the service activation is
+ enqueued.</para>
+
+ <para>Compare with <varname>PollLimitIntervalSec=</varname>/<varname>PollLimitBurst=</varname>
+ described below, which implements a temporary slowdown if a socket unit is flooded with incoming
+ traffic, as opposed to the permanent failure state
+ <varname>TriggerLimitIntervalSec=</varname>/<varname>TriggerLimitBurst=</varname> results in.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>PollLimitIntervalSec=</varname></term>
+ <term><varname>PollLimitBurst=</varname></term>
+
+ <listitem><para>Configures a limit on how often polling events on the file descriptors backing this
+ socket unit will be considered. This pair of settings is similar to
+ <varname>TriggerLimitIntervalSec=</varname>/<varname>TriggerLimitBurst=</varname> but instead of
+ putting a (fatal) limit on the activation frequency puts a (transient) limit on the polling
+ frequency. The expected parameter syntax and range are identical to that of the aforementioned
+ options, and can be disabled the same way.</para>
+
+ <para>If the polling limit is hit polling is temporarily disabled on it until the specified time
+ window passes. The polling limit hence slows down connection attempts if hit, but unlike the trigger
+ limit won't cause permanent failures. It's the recommended mechanism to deal with DoS attempts
+ through packet flooding.</para>
+
+ <para>The polling limit is enforced per file descriptor to listen on, as opposed to the trigger limit
+ which is enforced for the entire socket unit. This distinction matters for socket units that listen
+ on multiple file descriptors (i.e. have multiple <varname>ListenXYZ=</varname> stanzas).</para>
+
+ <para>These setting defaults to 150 (in case of <varname>Accept=yes</varname>) and 15 (otherwise)
+ polling events per 2s. This is considerably lower than the default values for the trigger limit (see
+ above) and means that the polling limit should typically ensure the trigger limit is never hit,
+ unless one of them is reconfigured or disabled.</para>
+ </listitem>
</varlistentry>
</variablelist>

View File

@ -0,0 +1,79 @@
From ae92a9714744bbf92fe69ffe276a668b031a6d26 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 18 Sep 2023 18:05:27 +0200
Subject: [PATCH 3/3] ci: add test for poll limit
(cherry picked from commit 065e478a4a8cc8e41a6e87756c081396f253e853)
---
test/TEST-07-PID1/test.sh | 2 ++
test/units/testsuite-07.poll-limit.sh | 48 +++++++++++++++++++++++++++
2 files changed, 50 insertions(+)
create mode 100755 test/units/testsuite-07.poll-limit.sh
diff --git a/test/TEST-07-PID1/test.sh b/test/TEST-07-PID1/test.sh
index 1c3d7137fe..d0e35d870f 100755
--- a/test/TEST-07-PID1/test.sh
+++ b/test/TEST-07-PID1/test.sh
@@ -32,6 +32,8 @@ Alias=issue2730-alias.mount
EOF
"${SYSTEMCTL:?}" enable --root="$workspace" issue2730.mount
ln -svrf "$workspace/etc/systemd/system/issue2730.mount" "$workspace/etc/systemd/system/issue2730-alias.mount"
+
+ image_install logger
}
do_test "$@"
diff --git a/test/units/testsuite-07.poll-limit.sh b/test/units/testsuite-07.poll-limit.sh
new file mode 100755
index 0000000000..480d7ee8df
--- /dev/null
+++ b/test/units/testsuite-07.poll-limit.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: LGPL-2.1-or-later
+set -eux
+set -o pipefail
+
+systemd-analyze log-level debug
+
+cat > /run/systemd/system/floodme@.service <<EOF
+[Service]
+ExecStart=/bin/true
+EOF
+
+cat > /run/systemd/system/floodme.socket <<EOF
+[Socket]
+ListenStream=/tmp/floodme
+PollLimitIntervalSec=10s
+Accept=yes
+PollLimitBurst=3
+EOF
+
+systemctl daemon-reload
+systemctl start floodme.socket
+
+START=$(date +%s%N)
+
+# Trigger this 100 times in a flood
+for (( i=0 ; i < 100; i++ )) ; do
+ logger -u /tmp/floodme foo &
+done
+
+# Let some time pass
+sleep 5
+
+END=$(date +%s%N)
+
+PASSED=$((END-START))
+
+# Calculate (round up) how many trigger events could have happened in the passed time
+MAXCOUNT=$(((PASSED+10000000000)*3/10000000000))
+
+# We started 100 connection attempts, but only 3 should have gone through, as per limit
+test "$(systemctl show -P NAccepted floodme.socket)" -le "$MAXCOUNT"
+
+systemctl stop floodme.socket floodme@*.service
+
+rm /run/systemd/system/floodme@.service /run/systemd/system/floodme.socket /tmp/floodme
+
+systemctl daemon-reload

View File

@ -113,6 +113,11 @@ Patch0002: https://github.com/systemd/systemd/pull/28521/commits/631d2b05ec
Patch0003: 0001-find_legacy_keymap-fix-empty-variant-matching.patch
Patch0004: 0002-find_legacy_keymap-try-matching-with-layout-order-re.patch
# Requested as an alternative to https://fedoraproject.org/wiki/Changes/Drop_Sshd_Socket
Patch0005: 0001-core-add-new-PollLimit-settings-to-.socket-units.patch
Patch0006: 0002-man-document-the-new-PollLimitIntervalSec-PollLimitB.patch
Patch0007: 0003-ci-add-test-for-poll-limit.patch
# Those are downstream-only patches, but we don't want them in packit builds:
# https://bugzilla.redhat.com/show_bug.cgi?id=1738828
Patch0490: use-bfq-scheduler.patch