Backport parallel debuginfo processing.

This commit is contained in:
Mark Wielaard 2017-06-23 16:24:36 +02:00
parent 117a783617
commit a11f229741
3 changed files with 221 additions and 1 deletions

View File

@ -0,0 +1,113 @@
commit 1b338aa84d4c67fefa957352a028eaca1a45d1f6
Author: Michal Marek <mmarek@suse.com>
Date: Sat Sep 10 23:13:25 2016 +0200
find-debuginfo.sh: Process files in parallel
Add a -j <n> option, which, when used, will spawn <n> processes to do the
debuginfo extraction in parallel. A pipe is used to dispatch the files among
the processes.
Signed-off-by: Michal Marek <mmarek@suse.com>
diff --git a/macros.in b/macros.in
index b03c5a9..8bde2d7 100644
--- a/macros.in
+++ b/macros.in
@@ -180,7 +180,7 @@
# the script. See the script for details.
#
%__debug_install_post \
- %{_rpmconfigdir}/find-debuginfo.sh %{?_missing_build_ids_terminate_build:--strict-build-id} %{?_no_recompute_build_ids:-n} %{?_include_minidebuginfo:-m} %{?_include_gdb_index:-i} %{?_unique_build_ids:--ver-rel "%{VERSION}-%{RELEASE}"} %{?_unique_debug_names:--unique-debug-arch "%{_arch}"} %{?_unique_debug_srcs:--unique-debug-src-base "%{name}"} %{?_find_debuginfo_dwz_opts} %{?_find_debuginfo_opts} "%{_builddir}/%{?buildsubdir}"\
+ %{_rpmconfigdir}/find-debuginfo.sh %{?_smp_mflags} %{?_missing_build_ids_terminate_build:--strict-build-id} %{?_no_recompute_build_ids:-n} %{?_include_minidebuginfo:-m} %{?_include_gdb_index:-i} %{?_unique_build_ids:--ver-rel "%{VERSION}-%{RELEASE}"} %{?_unique_debug_names:--unique-debug-arch "%{_arch}"} %{?_unique_debug_srcs:--unique-debug-src-base "%{name}"} %{?_find_debuginfo_dwz_opts} %{?_find_debuginfo_opts} "%{_builddir}/%{?buildsubdir}"\
%{nil}
# Template for debug information sub-package.
diff --git a/scripts/find-debuginfo.sh b/scripts/find-debuginfo.sh
index 6dcd5a4..2016222 100644
--- a/scripts/find-debuginfo.sh
+++ b/scripts/find-debuginfo.sh
@@ -74,6 +74,9 @@
# Base given by --unique-debug-src-base
unique_debug_src_base=
+# Number of parallel jobs to spawn
+n_jobs=1
+
BUILDDIR=.
out=debugfiles.list
nout=0
@@ -137,6 +140,13 @@
-r)
strip_r=true
;;
+ -j)
+ n_jobs=$2
+ shift
+ ;;
+ -j*)
+ n_jobs=${1#-j}
+ ;;
*)
BUILDDIR=$1
shift
@@ -389,9 +399,56 @@
fi
}
-while read nlinks inum f; do
- do_file "$nlinks" "$inum" "$f"
-done <"$temp/primary"
+# 16^6 - 1 or about 16 milion files
+FILENUM_DIGITS=6
+run_job()
+{
+ local jobid=$1 filenum
+ local SOURCEFILE=$temp/debugsources.$jobid ELFBINSFILE=$temp/elfbins.$jobid
+
+ >"$SOURCEFILE"
+ >"$ELFBINSFILE"
+ # can't use read -n <n>, because it reads bytes one by one, allowing for
+ # races
+ while :; do
+ filenum=$(dd bs=$(( FILENUM_DIGITS + 1 )) count=1 status=none)
+ if test -z "$filenum"; then
+ break
+ fi
+ do_file $(sed -n "$(( 0x$filenum )) p" "$temp/primary")
+ done
+ echo 0 >"$temp/res.$jobid"
+}
+
+n_files=$(wc -l <"$temp/primary")
+if [ $n_jobs -gt $n_files ]; then
+ n_jobs=$n_files
+fi
+if [ $n_jobs -le 1 ]; then
+ while read nlinks inum f; do
+ do_file "$nlinks" "$inum" "$f"
+ done <"$temp/primary"
+else
+ for ((i = 1; i <= n_files; i++)); do
+ printf "%0${FILENUM_DIGITS}x\\n" $i
+ done | (
+ exec 3<&0
+ for ((i = 0; i < n_jobs; i++)); do
+ # The shell redirects stdin to /dev/null for background jobs. Work
+ # around this by duplicating fd 0
+ run_job $i <&3 &
+ done
+ wait
+ )
+ for f in "$temp"/res.*; do
+ res=$(< "$f")
+ if [ "$res" != "0" ]; then
+ exit 1
+ fi
+ done
+ cat "$temp"/debugsources.* >"$SOURCEFILE"
+ cat "$temp"/elfbins.* >"$ELFBINSFILE"
+fi
# Invoke the DWARF Compressor utility.
if $run_dwz \

View File

@ -0,0 +1,100 @@
commit 038bfe01796f751001e02de41c5d8678f511f366
Author: Michal Marek <mmarek@suse.com>
Date: Sat Sep 10 23:13:24 2016 +0200
find-debuginfo.sh: Split directory traversal and debuginfo extraction
This siplifies the handling of hardlinks a bit and allows a later patch
to parallelize the debuginfo extraction.
Signed-off-by: Michal Marek <mmarek@suse.com>
diff --git a/scripts/find-debuginfo.sh b/scripts/find-debuginfo.sh
index d83c3e2..6dcd5a4 100644
--- a/scripts/find-debuginfo.sh
+++ b/scripts/find-debuginfo.sh
@@ -283,32 +283,36 @@
strict_error=ERROR
$strict || strict_error=WARNING
-# Strip ELF binaries
+temp=$(mktemp -d ${TMPDIR:-/tmp}/find-debuginfo.XXXXXX)
+trap 'rm -rf "$temp"' EXIT
+
+# Build a list of unstripped ELF files and their hardlinks
+touch "$temp/primary"
find "$RPM_BUILD_ROOT" ! -path "${debugdir}/*.debug" -type f \
\( -perm -0100 -or -perm -0010 -or -perm -0001 \) \
-print |
file -N -f - | sed -n -e 's/^\(.*\):[ ]*.*ELF.*, not stripped.*/\1/p' |
xargs --no-run-if-empty stat -c '%h %D_%i %n' |
while read nlinks inum f; do
- get_debugfn "$f"
- [ -f "${debugfn}" ] && continue
-
- # If this file has multiple links, keep track and make
- # the corresponding .debug files all links to one file too.
if [ $nlinks -gt 1 ]; then
- eval linked=\$linked_$inum
- if [ -n "$linked" ]; then
- eval id=\$linkedid_$inum
- link=$debugfn
- get_debugfn "$linked"
- echo "hard linked $link to $debugfn"
- mkdir -p "$(dirname "$link")" && ln -nf "$debugfn" "$link"
+ var=seen_$inum
+ if test -n "${!var}"; then
+ echo "$inum $f" >>"$temp/linked"
continue
else
- eval linked_$inum=\$f
- echo "file $f has $[$nlinks - 1] other hard links"
+ read "$var" < <(echo 1)
fi
fi
+ echo "$nlinks $inum $f" >>"$temp/primary"
+done
+
+# Strip ELF binaries
+do_file()
+{
+ local nlinks=$1 inum=$2 f=$3 id link linked
+
+ get_debugfn "$f"
+ [ -f "${debugfn}" ] && return
echo "extracting debug info from $f"
build_id_seed=
@@ -328,9 +332,6 @@
fi
id=$(${lib_rpm_dir}/debugedit -b $debug_base_name -d $debug_dest_name \
$no_recompute -i $build_id_seed -l "$SOURCEFILE" "$f") || exit
- if [ $nlinks -gt 1 ]; then
- eval linkedid_$inum=\$id
- fi
if [ -z "$id" ]; then
echo >&2 "*** ${strict_error}: No build ID note found in $f"
$strict && exit 2
@@ -376,7 +377,21 @@
echo "./${f#$RPM_BUILD_ROOT}" >> "$ELFBINSFILE"
-done || exit
+ # If this file has multiple links, make the corresponding .debug files
+ # all links to one file too.
+ if [ $nlinks -gt 1 ]; then
+ grep "^$inum " "$temp/linked" | while read inum linked; do
+ link=$debugfn
+ get_debugfn "$linked"
+ echo "hard linked $link to $debugfn"
+ mkdir -p "$(dirname "$debugfn")" && ln -nf "$link" "$debugfn"
+ done
+ fi
+}
+
+while read nlinks inum f; do
+ do_file "$nlinks" "$inum" "$f"
+done <"$temp/primary"
# Invoke the DWARF Compressor utility.
if $run_dwz \

View File

@ -33,7 +33,7 @@
Summary: The RPM package management system
Name: rpm
Version: %{rpmver}
Release: %{?snapver:0.%{snapver}.}23%{?dist}
Release: %{?snapver:0.%{snapver}.}24%{?dist}
Group: System Environment/Base
Url: http://www.rpm.org/
Source0: http://ftp.rpm.org/releases/%{srcdir}/%{name}-%{srcver}.tar.bz2
@ -93,6 +93,10 @@ Patch274: 0025-buildid-reset-attrs.patch
# World writable empty (tmp) dirs in debuginfo packages (#641022)
Patch280: rpm-4.13.x-writable-tmp-dir.patch
# Parallel debuginfo processing
Patch281: find-debuginfo-split-traversal-and-extraction.patch
Patch282: find-debuginfo-process-files-in-parallel.patch
# OpenSSL backend
Patch300: 0001-Add-OpenSSL-support-for-digest-and-signatures.patch
@ -594,6 +598,9 @@ exit 0
%doc doc/librpm/html/*
%changelog
* Fri Jun 23 2017 Mark Wielaard <mjw@fedoraproject.org> - 4.13.0.1-24
- Backport parallel debuginfo processing.
* Tue May 30 2017 Mark Wielaard <mjw@fedoraproject.org> - 4.13.0.1-23
- Fix resetting attr flags in buildid creation (#1449732)