From 1833f9dae371a48e3f52891262ad2d5fd75fc205 Mon Sep 17 00:00:00 2001 From: =?utf-8?q?Dan=20Hor=C3=A1k?= Date: Fri, 5 Jun 2009 14:12:52 +0200 Subject: [PATCH] s390-tools-1.8.1-ziomon-fixes --- ziomon/stats.h | 2 +- ziomon/ziomon | 84 ++++++++++++++++++++++++++++++++++++++++++++----- ziomon/ziomon_util.c | 2 +- 3 files changed, 77 insertions(+), 11 deletions(-) diff --git a/ziomon/stats.h b/ziomon/stats.h index a28d436..0920b27 100644 --- a/ziomon/stats.h +++ b/ziomon/stats.h @@ -142,7 +142,7 @@ static inline void histlog2_print(const char *s, const __u32 a[], int i; printf("%s:\n", s); - for (i = 0; i < h->num; i++) { + for (i = 0; i < h->num - 1; i++) { printf(" %10ld:%6d", (unsigned long)(histlog2_upper_limit(i, h)), a[i]); if (!((i + 1) % 4)) diff --git a/ziomon/ziomon b/ziomon/ziomon index aa1cf78..fe4d8ec 100755 --- a/ziomon/ziomon +++ b/ziomon/ziomon @@ -32,7 +32,7 @@ WRP_DEVICES=(); WRP_LUNS=(); WRP_LOGFILE=""; # limit of actual data in percent that need space on disk -WRP_SIZE_THRESHOLD="25"; +WRP_SIZE_THRESHOLD="10"; WRP_FORCE=0; function debug() { @@ -234,6 +234,7 @@ function start_trace() { local hosts_param; local luns_param; local i; + local len; if [ $WRP_DEBUG -ne 0 ]; then verbose="-V"; @@ -276,7 +277,7 @@ function start_trace() { blkiomon_command="blkiomon --interval=$WRP_INTERVAL -Q $WRP_MSG_Q_PATH -q $WRP_MSG_Q_ID -m $WRP_MSG_Q_BLKIOMON_ID $verbose_blk -d -"; zfcpdd_command="ziomon_zfcpdd -Q $WRP_MSG_Q_PATH -q $WRP_MSG_Q_ID -m $WRP_MSG_Q_ZIOMON_ZFCPDD_ID -i $WRP_INTERVAL"; debug "starting blktrace: $blktrace_command | $blkiomon_command | $zfcpdd_command"; - $blktrace_command | $blkiomon_command | $zfcpdd_command > $WRP_MSG_Q_PATH/blktrace.log & + $blktrace_command 2>$WRP_MSG_Q_PATH/blktrace.err | $blkiomon_command | $zfcpdd_command > $WRP_MSG_Q_PATH/blktrace.log & i=0; # might take a moment to start all processes in the pipe if system under load while [ $i -lt 60 ]; do @@ -303,7 +304,17 @@ function start_trace() { echo "done"; echo -n "Collecting data..."; - sleep $WRP_DURATION; + # pay extra attention to blktrace + for (( i=0; i<$WRP_DURATION; ++i )); do + len=`cat $WRP_MSG_Q_PATH/blktrace.err | wc -l`; + if [ $len -ne 0 ]; then + cat $WRP_MSG_Q_PATH/blktrace.err; + echo "Error: blktrace has errors, aborting"; + return; + fi + sleep 1; + done + echo "done"; } @@ -358,6 +369,58 @@ function emergency_shutdown() { } +function check_cpuplugd { + # check if cpuplugd is running + # If so, the whole per-cpu mechanism of blktrace gets corrupted, which + # results in the infamous 'bad trace magic' message + if [ -e /var/run/cpuplugd.pid ]; then + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"; + echo "ziomon: Warning: cpuplugd is running which can corrupt the traces."; + echo " It is recommended to stop cpuplugd for the duration of the"; + echo " trace using 'service cpuplugd stop'."; + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"; + fi +} + + +# we need 2MB per device and CPU +function check_vmalloc_space() { + local total; + local used; + local free; + local num_cpus; + local required; + local result; + + num_cpus=`cat /proc/cpuinfo | grep processors | awk '{print $4}'`; + total=`cat /proc/meminfo | grep VmallocTotal | awk '{print $2}'`; + used=`cat /proc/meminfo | grep VmallocUsed | awk '{print $2}'`; + + (( free=$total-$used )); + (( required=$num_cpus*${#WRP_DEVICES[@]}*2048 )); + (( result=$free-$required )); + debug "Required Vmalloc space: $required KBytes"; + if [ $result -lt 0 ]; then + echo "$WRP_TOOLNAME: Not enough free Vmalloc space:"; + echo " Required: $required KBytes"; + echo " Free: $free KBytes"; + exit 1; + fi + + return 0; +} + + +function check_blkiomon() { + # check blkiomon version + ver=`blkiomon -V | awk '{print $3}'`; + if [ "$ver" != "0.2" ]; then + echo "$WRP_TOOLNAME: Unsupported blkiomon version $ver detected, aborting"; + exit 1; + fi +} + + function setup() { while [ -e $WRP_MSG_Q_PATH ]; do WRP_MSG_Q_PATH="$WRP_MSG_Q_PATH$RANDOM"; @@ -476,7 +539,7 @@ function determine_host_adapters() { local num_s_devs; local s_dev_ratio; - echo -n "check devices..."; + echo -n "Check devices..."; # Estimate fraction of /dev/s* devices - if >50%, start with check for regular devices num_s_devs=`echo ${WRP_DEVICES[@]} | sed "s/ /\n/g" | grep /dev/s | wc -l`; @@ -599,7 +662,6 @@ function check_size_requirements() { local estimated_size; local free_space; local logpath=`dirname $WRP_LOGFILE`; - local num_uniq_devs; set `ziomon_mgr -e`; util_base_sz=$1; @@ -611,12 +673,10 @@ function check_size_requirements() { # NOTE: Since blktrace and ziomon_zfcpdd write messages only when there is # traffic, the estimate is an upper boundary only - num_uniq_devs=`echo ${WRP_LUNS[@]} | sed 's/ /\n/g' | cut -d : -f 4 | sort | uniq | wc -l`; - debug "number of unique devices: $num_uniq_devs"; debug "disk space requirements:"; (( size_per_record = $util_base_sz + ${#WRP_HOST_ADAPTERS[@]} * $util_variable_sz + $ioerr_base_sz - + $num_uniq_devs * ( $ioerr_variable_sz + $blkiotrace_sz + $zfcpiotrace_sz ) - + ( 2 + $num_uniq_devs) * 8 )); + + ${#WRP_DEVICES[@]} * ( $ioerr_variable_sz + $blkiotrace_sz + $zfcpiotrace_sz ) + + ( 2 + ${#WRP_DEVICES[@]}) * 8 )); debug " size per interval: $size_per_record Bytes"; (( total_num_records = $WRP_DURATION / $WRP_INTERVAL )); debug " total number of intervals: $total_num_records"; @@ -653,10 +713,16 @@ setup; parse_params $@; +check_cpuplugd; + +check_blkiomon; + check_for_existing_output; determine_host_adapters; +check_vmalloc_space; + check_size_requirements; [ $? -eq 0 ] && start_trace; diff --git a/ziomon/ziomon_util.c b/ziomon/ziomon_util.c index e3e0762..043d3d1 100644 --- a/ziomon/ziomon_util.c +++ b/ziomon/ziomon_util.c @@ -597,7 +597,7 @@ static int poll_ioerr_cnt(int init, struct ioerr_data *data, for (i=0; inum_luns; ++i) { /* read ioerr_cnt attribute */ if (read_attribute(opts->luns[i], line, NULL)) { - fprintf(stderr, "%s: Warning: Could read %s\n", + fprintf(stderr, "%s: Warning: Could not read %s\n", toolname, opts->luns[i]); grc++; continue; -- 1.6.0.6