kernel/RFC-audit-fix-a-race-condition-with-the-auditd-tracking-code.patch

From patchwork Thu Jun 15 15:28:58 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: [RFC] audit: fix a race condition with the auditd tracking code
From: Paul Moore <pmoore@redhat.com>
X-Patchwork-Id: 9789009
Message-Id: <149754053819.11365.5047864735077505545.stgit@sifl>
To: linux-audit@redhat.com
Cc: Dusty Mabe <dustymabe@redhat.com>
Date: Thu, 15 Jun 2017 11:28:58 -0400

From: Paul Moore <paul@paul-moore.com>

Originally reported by Adam and Dusty, it appears we have a small
race window in kauditd_thread(), as documented in the Fedora BZ:

 * https://bugzilla.redhat.com/show_bug.cgi?id=1459326#c35

 "This issue is partly due to the read-copy nature of RCU, and
  partly due to how we sync the auditd_connection state across
  kauditd_thread and the audit control channel.  The kauditd_thread
  thread is always running so it can service the record queues and
  emit the multicast messages, if it happens to be just past the
  "main_queue" label, but before the "if (sk == NULL || ...)"
  if-statement which calls auditd_reset() when the new auditd
  connection is registered it could end up resetting the auditd
  connection, regardless of if it is valid or not.  This is a rather
  small window and the variable nature of multi-core scheduling
  explains why this is proving rather difficult to reproduce."

The fix is to have functions only call auditd_reset() when they
believe that the kernel/auditd connection is still valid, e.g.
non-NULL, and to have these callers pass their local copy of the
auditd_connection pointer to auditd_reset() where it can be compared
with the current connection state before resetting.  If the caller
has a stale state tracking pointer then the reset is ignored.

We also make a small change to kauditd_thread() so that if the
kernel/auditd connection is dead we skip the retry queue and send the
records straight to the hold queue.  This is necessary as we used to
rely on auditd_reset() to occasionally purge the retry queue but we
are going to be calling the reset function much less now and we want
to make sure the retry queue doesn't grow unbounded.

Reported-by: Adam Williamson <awilliam@redhat.com>
Reported-by: Dusty Mabe <dustymabe@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
---
 kernel/audit.c |   36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)


--
Linux-audit mailing list
Linux-audit@redhat.com
https://www.redhat.com/mailman/listinfo/linux-audit

diff --git a/kernel/audit.c b/kernel/audit.c
index b2e877100242..e1e2b3abfb93 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -575,12 +575,16 @@ static void kauditd_retry_skb(struct sk_buff *skb)
 
 /**
  * auditd_reset - Disconnect the auditd connection
+ * @ac: auditd connection state
  *
  * Description:
  * Break the auditd/kauditd connection and move all the queued records into the
- * hold queue in case auditd reconnects.
+ * hold queue in case auditd reconnects.  It is important to note that the @ac
+ * pointer should never be dereferenced inside this function as it may be NULL
+ * or invalid, you can only compare the memory address!  If @ac is NULL then
+ * the connection will always be reset.
  */
-static void auditd_reset(void)
+static void auditd_reset(const struct auditd_connection *ac)
 {
 	unsigned long flags;
 	struct sk_buff *skb;
@@ -590,6 +594,11 @@ static void auditd_reset(void)
 	spin_lock_irqsave(&auditd_conn_lock, flags);
 	ac_old = rcu_dereference_protected(auditd_conn,
 					   lockdep_is_held(&auditd_conn_lock));
+	if (ac && ac != ac_old) {
+		/* someone already registered a new auditd connection */
+		spin_unlock_irqrestore(&auditd_conn_lock, flags);
+		return;
+	}
 	rcu_assign_pointer(auditd_conn, NULL);
 	spin_unlock_irqrestore(&auditd_conn_lock, flags);
 
@@ -649,8 +658,8 @@ static int auditd_send_unicast_skb(struct sk_buff *skb)
 	return rc;
 
 err:
-	if (rc == -ECONNREFUSED)
-		auditd_reset();
+	if (ac && rc == -ECONNREFUSED)
+		auditd_reset(ac);
 	return rc;
 }
 
@@ -795,9 +804,9 @@ static int kauditd_thread(void *dummy)
 		rc = kauditd_send_queue(sk, portid,
 					&audit_hold_queue, UNICAST_RETRIES,
 					NULL, kauditd_rehold_skb);
-		if (rc < 0) {
+		if (ac && rc < 0) {
 			sk = NULL;
-			auditd_reset();
+			auditd_reset(ac);
 			goto main_queue;
 		}
 
@@ -805,9 +814,9 @@ static int kauditd_thread(void *dummy)
 		rc = kauditd_send_queue(sk, portid,
 					&audit_retry_queue, UNICAST_RETRIES,
 					NULL, kauditd_hold_skb);
-		if (rc < 0) {
+		if (ac && rc < 0) {
 			sk = NULL;
-			auditd_reset();
+			auditd_reset(ac);
 			goto main_queue;
 		}
 
@@ -815,12 +824,13 @@ static int kauditd_thread(void *dummy)
 		/* process the main queue - do the multicast send and attempt
 		 * unicast, dump failed record sends to the retry queue; if
 		 * sk == NULL due to previous failures we will just do the
-		 * multicast send and move the record to the retry queue */
+		 * multicast send and move the record to the hold queue */
 		rc = kauditd_send_queue(sk, portid, &audit_queue, 1,
 					kauditd_send_multicast_skb,
-					kauditd_retry_skb);
-		if (sk == NULL || rc < 0)
-			auditd_reset();
+					(sk ?
+					 kauditd_retry_skb : kauditd_hold_skb));
+		if (ac && rc < 0)
+			auditd_reset(ac);
 		sk = NULL;
 
 		/* drop our netns reference, no auditd sends past this line */
@@ -1230,7 +1240,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 								auditd_pid, 1);
 
 				/* unregister the auditd connection */
-				auditd_reset();
+				auditd_reset(NULL);
 			}
 		}
 		if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
Disable debugging options. 2017-06-19 15:37:48 +00:00			`From patchwork Thu Jun 15 15:28:58 2017`
			`Content-Type: text/plain; charset="utf-8"`
			`MIME-Version: 1.0`
			`Content-Transfer-Encoding: 7bit`
			`Subject: [RFC] audit: fix a race condition with the auditd tracking code`
			`From: Paul Moore <pmoore@redhat.com>`
			`X-Patchwork-Id: 9789009`
			`Message-Id: <149754053819.11365.5047864735077505545.stgit@sifl>`
			`To: linux-audit@redhat.com`
			`Cc: Dusty Mabe <dustymabe@redhat.com>`
			`Date: Thu, 15 Jun 2017 11:28:58 -0400`

			`From: Paul Moore <paul@paul-moore.com>`

			`Originally reported by Adam and Dusty, it appears we have a small`
			`race window in kauditd_thread(), as documented in the Fedora BZ:`

			`* https://bugzilla.redhat.com/show_bug.cgi?id=1459326#c35`

			`"This issue is partly due to the read-copy nature of RCU, and`
			`partly due to how we sync the auditd_connection state across`
			`kauditd_thread and the audit control channel. The kauditd_thread`
			`thread is always running so it can service the record queues and`
			`emit the multicast messages, if it happens to be just past the`
			`"main_queue" label, but before the "if (sk == NULL \|\| ...)"`
			`if-statement which calls auditd_reset() when the new auditd`
			`connection is registered it could end up resetting the auditd`
			`connection, regardless of if it is valid or not. This is a rather`
			`small window and the variable nature of multi-core scheduling`
			`explains why this is proving rather difficult to reproduce."`

			`The fix is to have functions only call auditd_reset() when they`
			`believe that the kernel/auditd connection is still valid, e.g.`
			`non-NULL, and to have these callers pass their local copy of the`
			`auditd_connection pointer to auditd_reset() where it can be compared`
			`with the current connection state before resetting. If the caller`
			`has a stale state tracking pointer then the reset is ignored.`

			`We also make a small change to kauditd_thread() so that if the`
			`kernel/auditd connection is dead we skip the retry queue and send the`
			`records straight to the hold queue. This is necessary as we used to`
			`rely on auditd_reset() to occasionally purge the retry queue but we`
			`are going to be calling the reset function much less now and we want`
			`to make sure the retry queue doesn't grow unbounded.`

			`Reported-by: Adam Williamson <awilliam@redhat.com>`
			`Reported-by: Dusty Mabe <dustymabe@redhat.com>`
			`Signed-off-by: Paul Moore <paul@paul-moore.com>`
			`Reviewed-by: Richard Guy Briggs <rgb@redhat.com>`
			`---`
			`kernel/audit.c \| 36 +++++++++++++++++++++++-------------`
			`1 file changed, 23 insertions(+), 13 deletions(-)`


			`--`
			`Linux-audit mailing list`
			`Linux-audit@redhat.com`
			`https://www.redhat.com/mailman/listinfo/linux-audit`

			`diff --git a/kernel/audit.c b/kernel/audit.c`
			`index b2e877100242..e1e2b3abfb93 100644`
			`--- a/kernel/audit.c`
			`+++ b/kernel/audit.c`
			`@@ -575,12 +575,16 @@ static void kauditd_retry_skb(struct sk_buff *skb)`

			`/**`
			`* auditd_reset - Disconnect the auditd connection`
			`+ * @ac: auditd connection state`
			`*`
			`* Description:`
			`* Break the auditd/kauditd connection and move all the queued records into the`
			`- * hold queue in case auditd reconnects.`
			`+ * hold queue in case auditd reconnects. It is important to note that the @ac`
			`+ * pointer should never be dereferenced inside this function as it may be NULL`
			`+ * or invalid, you can only compare the memory address! If @ac is NULL then`
			`+ * the connection will always be reset.`
			`*/`
			`-static void auditd_reset(void)`
			`+static void auditd_reset(const struct auditd_connection *ac)`
			`{`
			`unsigned long flags;`
			`struct sk_buff *skb;`
			`@@ -590,6 +594,11 @@ static void auditd_reset(void)`
			`spin_lock_irqsave(&auditd_conn_lock, flags);`
			`ac_old = rcu_dereference_protected(auditd_conn,`
			`lockdep_is_held(&auditd_conn_lock));`
			`+ if (ac && ac != ac_old) {`
			`+ /* someone already registered a new auditd connection */`
			`+ spin_unlock_irqrestore(&auditd_conn_lock, flags);`
			`+ return;`
			`+ }`
			`rcu_assign_pointer(auditd_conn, NULL);`
			`spin_unlock_irqrestore(&auditd_conn_lock, flags);`

			`@@ -649,8 +658,8 @@ static int auditd_send_unicast_skb(struct sk_buff *skb)`
			`return rc;`

			`err:`
			`- if (rc == -ECONNREFUSED)`
			`- auditd_reset();`
			`+ if (ac && rc == -ECONNREFUSED)`
			`+ auditd_reset(ac);`
			`return rc;`
			`}`

			`@@ -795,9 +804,9 @@ static int kauditd_thread(void *dummy)`
			`rc = kauditd_send_queue(sk, portid,`
			`&audit_hold_queue, UNICAST_RETRIES,`
			`NULL, kauditd_rehold_skb);`
			`- if (rc < 0) {`
			`+ if (ac && rc < 0) {`
			`sk = NULL;`
			`- auditd_reset();`
			`+ auditd_reset(ac);`
			`goto main_queue;`
			`}`

			`@@ -805,9 +814,9 @@ static int kauditd_thread(void *dummy)`
			`rc = kauditd_send_queue(sk, portid,`
			`&audit_retry_queue, UNICAST_RETRIES,`
			`NULL, kauditd_hold_skb);`
			`- if (rc < 0) {`
			`+ if (ac && rc < 0) {`
			`sk = NULL;`
			`- auditd_reset();`
			`+ auditd_reset(ac);`
			`goto main_queue;`
			`}`

			`@@ -815,12 +824,13 @@ static int kauditd_thread(void *dummy)`
			`/* process the main queue - do the multicast send and attempt`
			`* unicast, dump failed record sends to the retry queue; if`
			`* sk == NULL due to previous failures we will just do the`
			`- * multicast send and move the record to the retry queue */`
			`+ * multicast send and move the record to the hold queue */`
			`rc = kauditd_send_queue(sk, portid, &audit_queue, 1,`
			`kauditd_send_multicast_skb,`
			`- kauditd_retry_skb);`
			`- if (sk == NULL \|\| rc < 0)`
			`- auditd_reset();`
			`+ (sk ?`
			`+ kauditd_retry_skb : kauditd_hold_skb));`
			`+ if (ac && rc < 0)`
			`+ auditd_reset(ac);`
			`sk = NULL;`

			`/* drop our netns reference, no auditd sends past this line */`
			`@@ -1230,7 +1240,7 @@ static int audit_receive_msg(struct sk_buff skb, struct nlmsghdr nlh)`
			`auditd_pid, 1);`

			`/* unregister the auditd connection */`
			`- auditd_reset();`
			`+ auditd_reset(NULL);`
			`}`
			`}`
			`if (s.mask & AUDIT_STATUS_RATE_LIMIT) {`