423 lines
14 KiB
Diff
423 lines
14 KiB
Diff
|
From 49c4172f4eef6e2015615e132b199a7ec0699ffc Mon Sep 17 00:00:00 2001
|
||
|
From: Kairui Song <kasong@redhat.com>
|
||
|
Date: Wed, 8 Apr 2020 16:39:52 +0800
|
||
|
Subject: [PATCH] Remove memtrace-ko and rd.memdebug=4 support in dracut
|
||
|
|
||
|
This feature could be off loaded to memstrack, which have better
|
||
|
accurecy, better performance, and have more detailed tracing features.
|
||
|
|
||
|
Also simplify make_trace_mem a bit.
|
||
|
|
||
|
And currently rd.memdebug=4 is unstable, fails from time to time.
|
||
|
---
|
||
|
dracut.cmdline.7.asc | 4 +--
|
||
|
modules.d/98dracut-systemd/dracut-cmdline.sh | 2 +-
|
||
|
modules.d/98dracut-systemd/dracut-pre-mount.sh | 2 +-
|
||
|
modules.d/98dracut-systemd/dracut-pre-pivot.sh | 2 +-
|
||
|
modules.d/98dracut-systemd/dracut-pre-trigger.sh | 2 +-
|
||
|
modules.d/99base/dracut-lib.sh | 44 +++++-------------------
|
||
|
modules.d/99base/init.sh | 8 ++---
|
||
|
modules.d/99base/memtrace-ko.sh | 191 -----------------------------------------------------------------------------------------------------
|
||
|
modules.d/99base/module-setup.sh | 1 -
|
||
|
9 files changed, 18 insertions(+), 238 deletions(-)
|
||
|
|
||
|
diff --git a/dracut.cmdline.7.asc b/dracut.cmdline.7.asc
|
||
|
index 2b38aa33..c53601ea 100644
|
||
|
--- a/dracut.cmdline.7.asc
|
||
|
+++ b/dracut.cmdline.7.asc
|
||
|
@@ -188,9 +188,9 @@ It should be attached to any report about dracut problems.
|
||
|
_/run/initramfs/init.log_.
|
||
|
If "quiet" is set, it also logs to the console.
|
||
|
|
||
|
-**rd.memdebug=[0-4]**::
|
||
|
+**rd.memdebug=[0-3]**::
|
||
|
Print memory usage info at various points, set the verbose level from 0 to 4.
|
||
|
-+
|
||
|
++
|
||
|
Higher level means more debugging output:
|
||
|
+
|
||
|
----
|
||
|
diff --git a/modules.d/98dracut-systemd/dracut-cmdline.sh b/modules.d/98dracut-systemd/dracut-cmdline.sh
|
||
|
index bff9435a..6c6ee026 100755
|
||
|
--- a/modules.d/98dracut-systemd/dracut-cmdline.sh
|
||
|
+++ b/modules.d/98dracut-systemd/dracut-cmdline.sh
|
||
|
@@ -42,7 +42,7 @@ export root
|
||
|
export rflags
|
||
|
export fstype
|
||
|
|
||
|
-make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab' '4+:komem'
|
||
|
+make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab'
|
||
|
# run scriptlets to parse the command line
|
||
|
getarg 'rd.break=cmdline' -d 'rdbreak=cmdline' && emergency_shell -n cmdline "Break before cmdline"
|
||
|
source_hook cmdline
|
||
|
diff --git a/modules.d/98dracut-systemd/dracut-pre-mount.sh b/modules.d/98dracut-systemd/dracut-pre-mount.sh
|
||
|
index a3b9d291..ae511286 100755
|
||
|
--- a/modules.d/98dracut-systemd/dracut-pre-mount.sh
|
||
|
+++ b/modules.d/98dracut-systemd/dracut-pre-mount.sh
|
||
|
@@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
|
||
|
|
||
|
source_conf /etc/conf.d
|
||
|
|
||
|
-make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
|
||
|
+make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab'
|
||
|
# pre pivot scripts are sourced just before we doing cleanup and switch over
|
||
|
# to the new root.
|
||
|
getarg 'rd.break=pre-mount' 'rdbreak=pre-mount' && emergency_shell -n pre-mount "Break pre-mount"
|
||
|
diff --git a/modules.d/98dracut-systemd/dracut-pre-pivot.sh b/modules.d/98dracut-systemd/dracut-pre-pivot.sh
|
||
|
index dc9a2504..96e6f8ca 100755
|
||
|
--- a/modules.d/98dracut-systemd/dracut-pre-pivot.sh
|
||
|
+++ b/modules.d/98dracut-systemd/dracut-pre-pivot.sh
|
||
|
@@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
|
||
|
|
||
|
source_conf /etc/conf.d
|
||
|
|
||
|
-make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
|
||
|
+make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab'
|
||
|
# pre pivot scripts are sourced just before we doing cleanup and switch over
|
||
|
# to the new root.
|
||
|
getarg 'rd.break=pre-pivot' 'rdbreak=pre-pivot' && emergency_shell -n pre-pivot "Break pre-pivot"
|
||
|
diff --git a/modules.d/98dracut-systemd/dracut-pre-trigger.sh b/modules.d/98dracut-systemd/dracut-pre-trigger.sh
|
||
|
index 7cd821ed..a1a33960 100755
|
||
|
--- a/modules.d/98dracut-systemd/dracut-pre-trigger.sh
|
||
|
+++ b/modules.d/98dracut-systemd/dracut-pre-trigger.sh
|
||
|
@@ -8,7 +8,7 @@ type getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh
|
||
|
|
||
|
source_conf /etc/conf.d
|
||
|
|
||
|
-make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
|
||
|
+make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab'
|
||
|
|
||
|
source_hook pre-trigger
|
||
|
|
||
|
diff --git a/modules.d/99base/dracut-lib.sh b/modules.d/99base/dracut-lib.sh
|
||
|
index c57523d3..b7020892 100755
|
||
|
--- a/modules.d/99base/dracut-lib.sh
|
||
|
+++ b/modules.d/99base/dracut-lib.sh
|
||
|
@@ -1187,50 +1187,25 @@ are_lists_eq() {
|
||
|
|
||
|
setmemdebug() {
|
||
|
if [ -z "$DEBUG_MEM_LEVEL" ]; then
|
||
|
- export DEBUG_MEM_LEVEL=$(getargnum 0 0 4 rd.memdebug)
|
||
|
+ export DEBUG_MEM_LEVEL=$(getargnum 0 0 3 rd.memdebug)
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
setmemdebug
|
||
|
|
||
|
-cleanup_trace_mem()
|
||
|
-{
|
||
|
- # tracekomem based on kernel trace needs cleanup after use.
|
||
|
- if [ "$DEBUG_MEM_LEVEL" -eq 4 ]; then
|
||
|
- tracekomem --cleanup
|
||
|
- fi
|
||
|
-}
|
||
|
-
|
||
|
-# parameters: msg [trace_level:trace]...
|
||
|
-make_trace_mem()
|
||
|
-{
|
||
|
- local msg
|
||
|
- msg="$1"
|
||
|
- shift
|
||
|
- if [ -n "$DEBUG_MEM_LEVEL" ] && [ "$DEBUG_MEM_LEVEL" -gt 0 ]; then
|
||
|
- make_trace show_memstats $DEBUG_MEM_LEVEL "[debug_mem]" "$msg" "$@" >&2
|
||
|
- fi
|
||
|
-}
|
||
|
-
|
||
|
# parameters: func log_level prefix msg [trace_level:trace]...
|
||
|
-make_trace()
|
||
|
+make_trace_mem()
|
||
|
{
|
||
|
- local func log_level prefix msg msg_printed
|
||
|
+ local log_level prefix msg msg_printed
|
||
|
local trace trace_level trace_in_higher_levels insert_trace
|
||
|
|
||
|
- func=$1
|
||
|
- shift
|
||
|
-
|
||
|
- log_level=$1
|
||
|
- shift
|
||
|
-
|
||
|
- prefix=$1
|
||
|
- shift
|
||
|
-
|
||
|
msg=$1
|
||
|
shift
|
||
|
|
||
|
- if [ -z "$log_level" ]; then
|
||
|
+ prefix='[debug_mem]'
|
||
|
+ log_level=$DEBUG_MEM_LEVEL
|
||
|
+
|
||
|
+ if [ -z "$log_level" ] || [ "$log_level" -le 0 ]; then
|
||
|
return
|
||
|
fi
|
||
|
|
||
|
@@ -1263,7 +1238,7 @@ make_trace()
|
||
|
echo "$prefix $msg"
|
||
|
msg_printed=1
|
||
|
fi
|
||
|
- $func $trace
|
||
|
+ show_memstats $trace
|
||
|
fi
|
||
|
shift
|
||
|
done
|
||
|
@@ -1285,9 +1260,6 @@ show_memstats()
|
||
|
iomem)
|
||
|
cat /proc/iomem
|
||
|
;;
|
||
|
- komem)
|
||
|
- tracekomem
|
||
|
- ;;
|
||
|
esac
|
||
|
}
|
||
|
|
||
|
diff --git a/modules.d/99base/init.sh b/modules.d/99base/init.sh
|
||
|
index 1111d09b..148ce831 100755
|
||
|
--- a/modules.d/99base/init.sh
|
||
|
+++ b/modules.d/99base/init.sh
|
||
|
@@ -131,7 +131,7 @@ if ! getargbool 1 'rd.hostonly'; then
|
||
|
fi
|
||
|
|
||
|
# run scriptlets to parse the command line
|
||
|
-make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab' '4+:komem'
|
||
|
+make_trace_mem "hook cmdline" '1+:mem' '1+:iomem' '3+:slab'
|
||
|
getarg 'rd.break=cmdline' -d 'rdbreak=cmdline' && emergency_shell -n cmdline "Break before cmdline"
|
||
|
source_hook cmdline
|
||
|
|
||
|
@@ -160,7 +160,7 @@ fi
|
||
|
|
||
|
udevproperty "hookdir=$hookdir"
|
||
|
|
||
|
-make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
|
||
|
+make_trace_mem "hook pre-trigger" '1:shortmem' '2+:mem' '3+:slab'
|
||
|
getarg 'rd.break=pre-trigger' -d 'rdbreak=pre-trigger' && emergency_shell -n pre-trigger "Break before pre-trigger"
|
||
|
source_hook pre-trigger
|
||
|
|
||
|
@@ -230,7 +230,7 @@ unset RDRETRY
|
||
|
|
||
|
# pre-mount happens before we try to mount the root filesystem,
|
||
|
# and happens once.
|
||
|
-make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
|
||
|
+make_trace_mem "hook pre-mount" '1:shortmem' '2+:mem' '3+:slab'
|
||
|
getarg 'rd.break=pre-mount' -d 'rdbreak=pre-mount' && emergency_shell -n pre-mount "Break pre-mount"
|
||
|
source_hook pre-mount
|
||
|
|
||
|
@@ -266,7 +266,7 @@ done
|
||
|
|
||
|
# pre pivot scripts are sourced just before we doing cleanup and switch over
|
||
|
# to the new root.
|
||
|
-make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab' '4+:komem'
|
||
|
+make_trace_mem "hook pre-pivot" '1:shortmem' '2+:mem' '3+:slab'
|
||
|
getarg 'rd.break=pre-pivot' -d 'rdbreak=pre-pivot' && emergency_shell -n pre-pivot "Break pre-pivot"
|
||
|
source_hook pre-pivot
|
||
|
|
||
|
diff --git a/modules.d/99base/memtrace-ko.sh b/modules.d/99base/memtrace-ko.sh
|
||
|
deleted file mode 100755
|
||
|
index ee035e15..00000000
|
||
|
--- a/modules.d/99base/memtrace-ko.sh
|
||
|
+++ /dev/null
|
||
|
@@ -1,191 +0,0 @@
|
||
|
-#!/bin/sh
|
||
|
-
|
||
|
-# Try to find out kernel modules with large total memory allocation during loading.
|
||
|
-# For large slab allocation, it will fall into buddy, also not trace "mm_page_free"
|
||
|
-# considering large free is quite rare for module_init, thus saving tons of events
|
||
|
-# to avoid trace data overwritten.
|
||
|
-#
|
||
|
-# Therefore, tracing "mm_page_alloc"alone should be enough for the purpose.
|
||
|
-
|
||
|
-# "sys/kernel/tracing" has the priority if exists.
|
||
|
-get_trace_base() {
|
||
|
- # trace access through debugfs would be obsolete if "/sys/kernel/tracing" is available.
|
||
|
- if [ -d "/sys/kernel/tracing" ]; then
|
||
|
- echo "/sys/kernel"
|
||
|
- else
|
||
|
- echo "/sys/kernel/debug"
|
||
|
- fi
|
||
|
-}
|
||
|
-
|
||
|
-# We want to enable these trace events.
|
||
|
-get_want_events() {
|
||
|
- echo "module:module_put module:module_load kmem:mm_page_alloc"
|
||
|
-}
|
||
|
-
|
||
|
-get_event_filter() {
|
||
|
- echo "comm == systemd-udevd || comm == modprobe || comm == insmod"
|
||
|
-}
|
||
|
-
|
||
|
-is_trace_ready() {
|
||
|
- local trace_base want_events current_events
|
||
|
-
|
||
|
- trace_base=$(get_trace_base)
|
||
|
- ! [ -f "$trace_base/tracing/trace" ] && return 1
|
||
|
-
|
||
|
- [ "$(cat $trace_base/tracing/tracing_on)" -eq 0 ] && return 1
|
||
|
-
|
||
|
- # Also check if trace events were properly setup.
|
||
|
- want_events=$(get_want_events)
|
||
|
- current_events=$(echo $(cat $trace_base/tracing/set_event))
|
||
|
- [ "$current_events" != "$want_events" ] && return 1
|
||
|
-
|
||
|
- return 0
|
||
|
-}
|
||
|
-
|
||
|
-prepare_trace() {
|
||
|
- local trace_base
|
||
|
-
|
||
|
- trace_base=$(get_trace_base)
|
||
|
- # old debugfs interface case.
|
||
|
- if ! [ -d "$trace_base/tracing" ]; then
|
||
|
- mount none -t debugfs $trace_base
|
||
|
- # new tracefs interface case.
|
||
|
- elif ! [ -f "$trace_base/tracing/trace" ]; then
|
||
|
- mount none -t tracefs "$trace_base/tracing"
|
||
|
- fi
|
||
|
-
|
||
|
- if ! [ -f "$trace_base/tracing/trace" ]; then
|
||
|
- echo "WARN: Mount trace failed for kernel module memory analyzing."
|
||
|
- return 1
|
||
|
- fi
|
||
|
-
|
||
|
- # Active all the wanted trace events.
|
||
|
- echo "$(get_want_events)" > $trace_base/tracing/set_event
|
||
|
-
|
||
|
- # There are three kinds of known applications for module loading:
|
||
|
- # "systemd-udevd", "modprobe" and "insmod".
|
||
|
- # Set them as the global events filter.
|
||
|
- # NOTE: Some kernel may not support this format of filter, anyway
|
||
|
- # the operation will fail and it doesn't matter.
|
||
|
- echo "$(get_event_filter)" > $trace_base/tracing/events/kmem/filter 2>&1
|
||
|
- echo "$(get_event_filter)" > $trace_base/tracing/events/module/filter 2>&1
|
||
|
-
|
||
|
- # Set the number of comm-pid if supported.
|
||
|
- if [ -f "$trace_base/tracing/saved_cmdlines_size" ]; then
|
||
|
- # Thanks to filters, 4096 is big enough(also well supported).
|
||
|
- echo 4096 > $trace_base/tracing/saved_cmdlines_size
|
||
|
- fi
|
||
|
-
|
||
|
- # Enable and clear trace data for the first time.
|
||
|
- echo 1 > $trace_base/tracing/tracing_on
|
||
|
- echo > $trace_base/tracing/trace
|
||
|
- echo "Prepare trace success."
|
||
|
- return 0
|
||
|
-}
|
||
|
-
|
||
|
-order_to_pages()
|
||
|
-{
|
||
|
- local pages=1
|
||
|
- local order=$1
|
||
|
-
|
||
|
- while [ "$order" != 0 ]; do
|
||
|
- order=$((order-1))
|
||
|
- pages=$(($pages*2))
|
||
|
- done
|
||
|
-
|
||
|
- echo $pages
|
||
|
-}
|
||
|
-
|
||
|
-parse_trace_data() {
|
||
|
- local module_name tmp_eval pages
|
||
|
-
|
||
|
- cat "$(get_trace_base)/tracing/trace" | while read pid cpu flags ts function args
|
||
|
- do
|
||
|
- # Skip comment lines
|
||
|
- if [ "$pid" = "#" ]; then
|
||
|
- continue
|
||
|
- fi
|
||
|
-
|
||
|
- pid=${pid##*-}
|
||
|
- function=${function%:}
|
||
|
- if [ "$function" = "module_load" ]; then
|
||
|
- # One module is being loaded, save the task pid for tracking.
|
||
|
- # Remove the trailing after whitespace, there may be the module flags.
|
||
|
- module_name=${args%% *}
|
||
|
- # Mark current_module to track the task.
|
||
|
- eval current_module_$pid="$module_name"
|
||
|
- tmp_eval=$(eval echo '${module_loaded_'${module_name}'}')
|
||
|
- if [ -n "$tmp_eval" ]; then
|
||
|
- echo "WARN: \"$module_name\" was loaded multiple times!"
|
||
|
- fi
|
||
|
- eval unset module_loaded_$module_name
|
||
|
- eval nr_alloc_pages_$module_name=0
|
||
|
- continue
|
||
|
- fi
|
||
|
-
|
||
|
- module_name=$(eval echo '${current_module_'${pid}'}')
|
||
|
- if [ -z "$module_name" ]; then
|
||
|
- continue
|
||
|
- fi
|
||
|
-
|
||
|
- # Once we get here, the task is being tracked(is loading a module).
|
||
|
- if [ "$function" = "module_put" ]; then
|
||
|
- # Mark the module as loaded when the first module_put event happens after module_load.
|
||
|
- tmp_eval=$(eval echo '${nr_alloc_pages_'${module_name}'}')
|
||
|
- echo "$tmp_eval pages consumed by \"$module_name\""
|
||
|
- eval module_loaded_$module_name=1
|
||
|
- # Module loading finished, so untrack the task.
|
||
|
- eval unset current_module_$pid
|
||
|
- eval unset nr_alloc_pages_$module_name
|
||
|
- continue
|
||
|
- fi
|
||
|
-
|
||
|
- if [ "$function" = "mm_page_alloc" ]; then
|
||
|
- # Get order first, then convert to actual pages.
|
||
|
- pages=$(echo $args | sed -e 's/.*order=\([0-9]*\) .*/\1/')
|
||
|
- pages=$(order_to_pages "$pages")
|
||
|
- tmp_eval=$(eval echo '${nr_alloc_pages_'${module_name}'}')
|
||
|
- eval nr_alloc_pages_$module_name="$(($tmp_eval+$pages))"
|
||
|
- fi
|
||
|
- done
|
||
|
-}
|
||
|
-
|
||
|
-cleanup_trace() {
|
||
|
- local trace_base
|
||
|
-
|
||
|
- if is_trace_ready; then
|
||
|
- trace_base=$(get_trace_base)
|
||
|
- echo 0 > $trace_base/tracing/tracing_on
|
||
|
- echo > $trace_base/tracing/trace
|
||
|
- echo > $trace_base/tracing/set_event
|
||
|
- echo 0 > $trace_base/tracing/events/kmem/filter
|
||
|
- echo 0 > $trace_base/tracing/events/module/filter
|
||
|
- fi
|
||
|
-}
|
||
|
-
|
||
|
-show_usage() {
|
||
|
- echo "Find out kernel modules with large memory consumption during loading based on trace."
|
||
|
- echo "Usage:"
|
||
|
- echo "1) run it first to setup trace."
|
||
|
- echo "2) run again to parse the trace data if any."
|
||
|
- echo "3) run with \"--cleanup\" option to cleanup trace after use."
|
||
|
-}
|
||
|
-
|
||
|
-if [ "$1" = "--help" ]; then
|
||
|
- show_usage
|
||
|
- exit 0
|
||
|
-fi
|
||
|
-
|
||
|
-if [ "$1" = "--cleanup" ]; then
|
||
|
- cleanup_trace
|
||
|
- exit 0
|
||
|
-fi
|
||
|
-
|
||
|
-if is_trace_ready ; then
|
||
|
- echo "tracekomem - Rough memory consumption by loading kernel modules (larger value with better accuracy)"
|
||
|
- parse_trace_data
|
||
|
-else
|
||
|
- prepare_trace
|
||
|
-fi
|
||
|
-
|
||
|
-exit $?
|
||
|
diff --git a/modules.d/99base/module-setup.sh b/modules.d/99base/module-setup.sh
|
||
|
index 3a2246b4..c9e3ebcb 100755
|
||
|
--- a/modules.d/99base/module-setup.sh
|
||
|
+++ b/modules.d/99base/module-setup.sh
|
||
|
@@ -39,7 +39,6 @@ install() {
|
||
|
inst_script "$moddir/initqueue.sh" "/sbin/initqueue"
|
||
|
inst_script "$moddir/loginit.sh" "/sbin/loginit"
|
||
|
inst_script "$moddir/rdsosreport.sh" "/sbin/rdsosreport"
|
||
|
- inst_script "$moddir/memtrace-ko.sh" "/sbin/tracekomem"
|
||
|
|
||
|
[ -e "${initdir}/lib" ] || mkdir -m 0755 -p ${initdir}/lib
|
||
|
mkdir -m 0755 -p ${initdir}/lib/dracut
|
||
|
|