erlang/otp-0011-Improve-nodes-querying.patch
Peter Lemenkov 54bf40d43a Fix issue with nodes info querying
Signed-off-by: Peter Lemenkov <lemenkov@gmail.com>
2019-01-15 18:38:56 +03:00

135 lines
4.5 KiB
Diff

From: Peter Lemenkov <lemenkov@gmail.com>
Date: Wed, 24 Oct 2018 14:58:41 +0200
Subject: [PATCH] Improve nodes querying
We've got a few similar stacktraces once. See the following one for
example:
** Reason for termination ==
** {badarg,
[{ets,next,[sys_dist,'rabbitmq-cli-42@host.example.com'],[]},
{net_kernel,get_nodes,2,[{file,"net_kernel.erl"},{line,1025}]},
{net_kernel,get_nodes,2,[{file,"net_kernel.erl"},{line,1019}]},
{net_kernel,get_nodes_info,0,[{file,"net_kernel.erl"},{line,1439}]},
{rabbit_mgmt_external_stats,cluster_links,0,
[{file,"src/rabbit_mgmt_external_stats.erl"},{line,252}]},
{rabbit_mgmt_external_stats,emit_node_node_stats,1,
[{file,"src/rabbit_mgmt_external_stats.erl"},{line,366}]},
{rabbit_mgmt_external_stats,handle_info,2,
[{file,"src/rabbit_mgmt_external_stats.erl"},{line,347}]},
{gen_server,try_dispatch,4,[{file,"gen_server.erl"},{line,615}]}]}
The problem is that when we're trying to query a list of connected
nodes, we're doing it in the following way:
Call for the first record in ETS
While not EOF:
Call for the next record in ETS
What happens, when some Node disconnects during the "not EOF" loop?
We'll get an exception.
Let's do it differently - query a list of nodes in one shot, and then
get info from each of the nodes in list (w/o extra calls to ets). These
individual calls care of disconnected nodes so everything will be fine
even if a node disconnects.
Signed-off-by: Peter Lemenkov <lemenkov@gmail.com>
diff --git a/lib/kernel/src/net_kernel.erl b/lib/kernel/src/net_kernel.erl
index 7da89dd7cb..f57324b67f 100644
--- a/lib/kernel/src/net_kernel.erl
+++ b/lib/kernel/src/net_kernel.erl
@@ -609,24 +609,16 @@ code_change(_OldVsn, State, _Extra) ->
terminate(no_network, State) ->
lists:foreach(
- fun({Node, Type}) ->
- case Type of
- normal -> ?nodedown(Node, State);
- _ -> ok
- end
- end, get_up_nodes() ++ [{node(), normal}]);
+ fun(Node) -> ?nodedown(Node, State)
+ end, get_nodes_up_normal() ++ [node()]);
terminate(_Reason, State) ->
lists:foreach(
fun(#listen {listen = Listen,module = Mod}) ->
Mod:close(Listen)
end, State#state.listen),
lists:foreach(
- fun({Node, Type}) ->
- case Type of
- normal -> ?nodedown(Node, State);
- _ -> ok
- end
- end, get_up_nodes() ++ [{node(), normal}]).
+ fun(Node) -> ?nodedown(Node, State)
+ end, get_nodes_up_normal() ++ [node()]).
%% ------------------------------------------------------------
@@ -1045,35 +1037,10 @@ disconnect_pid(Pid, State) ->
%%
%%
%%
-get_nodes(Which) ->
- get_nodes(ets:first(sys_dist), Which).
-get_nodes('$end_of_table', _) ->
- [];
-get_nodes(Key, Which) ->
- case ets:lookup(sys_dist, Key) of
- [Conn = #connection{state = up}] ->
- [Conn#connection.node | get_nodes(ets:next(sys_dist, Key),
- Which)];
- [Conn = #connection{}] when Which =:= all ->
- [Conn#connection.node | get_nodes(ets:next(sys_dist, Key),
- Which)];
- _ ->
- get_nodes(ets:next(sys_dist, Key), Which)
- end.
-
-%% Return a list of all nodes that are 'up'.
-get_up_nodes() ->
- get_up_nodes(ets:first(sys_dist)).
-
-get_up_nodes('$end_of_table') -> [];
-get_up_nodes(Key) ->
- case ets:lookup(sys_dist, Key) of
- [#connection{state=up,node=Node,type=Type}] ->
- [{Node,Type}|get_up_nodes(ets:next(sys_dist, Key))];
- _ ->
- get_up_nodes(ets:next(sys_dist, Key))
- end.
+%% Return a list of all nodes that are 'up' and not hidden.
+get_nodes_up_normal() ->
+ ets:select(sys_dist, [{#connection{node = '$1', state = up, type = normal, _ = '_'}, [], ['$1']}]).
ticker(Kernel, Tick) when is_integer(Tick) ->
process_flag(priority, max),
@@ -1525,15 +1492,14 @@ get_node_info(Node, Key) ->
end.
get_nodes_info() ->
- get_nodes_info(get_nodes(all), []).
-
-get_nodes_info([Node|Nodes], InfoList) ->
- case get_node_info(Node) of
- {ok, Info} -> get_nodes_info(Nodes, [{Node, Info}|InfoList]);
- _ -> get_nodes_info(Nodes, InfoList)
- end;
-get_nodes_info([], InfoList) ->
- {ok, InfoList}.
+ Nodes = ets:select(sys_dist, [{#connection{node = '$1', _ = '_'}, [], ['$1']}]),
+ {ok, lists:filtermap(
+ fun(Node) ->
+ case get_node_info(Node) of
+ {ok, Info} -> {true, {Node, Info}};
+ _ -> false
+ end
+ end, Nodes)}.
%% ------------------------------------------------------------
%% Misc. functions