diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 38993e09ef03..deb9eba3a43d 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -139,7 +139,7 @@ static void ftrace_mod_code(void) clear_mod_flag(); } -void ftrace_nmi_enter(void) +void arch_ftrace_nmi_enter(void) { if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { smp_rmb(); @@ -150,7 +150,7 @@ void ftrace_nmi_enter(void) smp_mb(); } -void ftrace_nmi_exit(void) +void arch_ftrace_nmi_exit(void) { /* Finish all executions before clearing nmi_running */ smp_mb(); diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index dca7bf8cffe2..4ec2c9b205f2 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -3,11 +3,34 @@ #ifdef CONFIG_FTRACE_NMI_ENTER -extern void ftrace_nmi_enter(void); -extern void ftrace_nmi_exit(void); +extern void arch_ftrace_nmi_enter(void); +extern void arch_ftrace_nmi_exit(void); #else -static inline void ftrace_nmi_enter(void) { } -static inline void ftrace_nmi_exit(void) { } +static inline void arch_ftrace_nmi_enter(void) { } +static inline void arch_ftrace_nmi_exit(void) { } #endif +#ifdef CONFIG_HWLAT_TRACER +extern bool trace_hwlat_callback_enabled; +extern void trace_hwlat_callback(bool enter); +#endif + +static inline void ftrace_nmi_enter(void) +{ +#ifdef CONFIG_HWLAT_TRACER + if (trace_hwlat_callback_enabled) + trace_hwlat_callback(true); +#endif + arch_ftrace_nmi_enter(); +} + +static inline void ftrace_nmi_exit(void) +{ + arch_ftrace_nmi_exit(); +#ifdef CONFIG_HWLAT_TRACER + if (trace_hwlat_callback_enabled) + trace_hwlat_callback(false); +#endif +} + #endif /* _LINUX_FTRACE_IRQ_H */ diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 70d47dd99359..d1cc37e78f99 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -330,18 +330,22 @@ FTRACE_ENTRY(hwlat, hwlat_entry, F_STRUCT( __field( u64, duration ) __field( u64, outer_duration ) + __field( u64, nmi_total_ts ) __field_struct( struct timespec, timestamp ) __field_desc( long, timestamp, tv_sec ) __field_desc( long, timestamp, tv_nsec ) + __field( unsigned int, nmi_count ) __field( unsigned int, seqnum ) ), - F_printk("cnt:%u\tts:%010lu.%010lu\tinner:%llu\touter:%llu\n", + F_printk("cnt:%u\tts:%010lu.%010lu\tinner:%llu\touter:%llunmi-ts:%llu\tnmi-count:%u\n", __entry->seqnum, __entry->tv_sec, __entry->tv_nsec, __entry->duration, - __entry->outer_duration), + __entry->outer_duration, + __entry->nmi_total_ts, + __entry->nmi_count), FILTER_OTHER ); diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 65aab3914a56..b97286c48735 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -64,6 +64,15 @@ static struct dentry *hwlat_sample_window; /* sample window us */ /* Save the previous tracing_thresh value */ static unsigned long save_tracing_thresh; +/* NMI timestamp counters */ +static u64 nmi_ts_start; +static u64 nmi_total_ts; +static int nmi_count; +static int nmi_cpu; + +/* Tells NMIs to call back to the hwlat tracer to record timestamps */ +bool trace_hwlat_callback_enabled; + /* If the user changed threshold, remember it */ static u64 last_tracing_thresh = DEFAULT_LAT_THRESHOLD * NSEC_PER_USEC; @@ -72,7 +81,9 @@ struct hwlat_sample { u64 seqnum; /* unique sequence */ u64 duration; /* delta */ u64 outer_duration; /* delta (outer loop) */ + u64 nmi_total_ts; /* Total time spent in NMIs */ struct timespec timestamp; /* wall time */ + int nmi_count; /* # NMIs during this sample */ }; /* keep the global state somewhere. */ @@ -112,6 +123,8 @@ static void trace_hwlat_sample(struct hwlat_sample *sample) entry->duration = sample->duration; entry->outer_duration = sample->outer_duration; entry->timestamp = sample->timestamp; + entry->nmi_total_ts = sample->nmi_total_ts; + entry->nmi_count = sample->nmi_count; if (!call_filter_check_discard(call, entry, buffer, event)) __buffer_unlock_commit(buffer, event); @@ -125,6 +138,26 @@ static void trace_hwlat_sample(struct hwlat_sample *sample) #define init_time(a, b) (a = b) #define time_u64(a) a +void trace_hwlat_callback(bool enter) +{ + if (smp_processor_id() != nmi_cpu) + return; + + /* + * Currently trace_clock_local() calls sched_clock() and the + * generic version is not NMI safe. + */ + if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { + if (enter) + nmi_ts_start = time_get(); + else + nmi_total_ts = time_get() - nmi_ts_start; + } + + if (enter) + nmi_count++; +} + /** * get_sample - sample the CPU TSC and look for likely hardware latencies * @@ -144,6 +177,14 @@ static int get_sample(void) do_div(thresh, NSEC_PER_USEC); /* modifies interval value */ + nmi_cpu = smp_processor_id(); + nmi_total_ts = 0; + nmi_count = 0; + /* Make sure NMIs see this first */ + barrier(); + + trace_hwlat_callback_enabled = true; + init_time(last_t2, 0); start = time_get(); /* start timestamp */ @@ -188,6 +229,10 @@ static int get_sample(void) } while (total <= hwlat_data.sample_width); + barrier(); /* finish the above in the view for NMIs */ + trace_hwlat_callback_enabled = false; + barrier(); /* Make sure nmi_total_ts is no longer updated */ + ret = 0; /* If we exceed the threshold value, we have found a hardware latency */ @@ -196,11 +241,17 @@ static int get_sample(void) ret = 1; + /* We read in microseconds */ + if (nmi_total_ts) + do_div(nmi_total_ts, NSEC_PER_USEC); + hwlat_data.count++; s.seqnum = hwlat_data.count; s.duration = sample; s.outer_duration = outer_sample; s.timestamp = CURRENT_TIME; + s.nmi_total_ts = nmi_total_ts; + s.nmi_count = nmi_count; trace_hwlat_sample(&s); /* Keep a running maximum ever recorded hardware latency */ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index d67a562df259..3fc20422c166 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1109,13 +1109,27 @@ trace_hwlat_print(struct trace_iterator *iter, int flags, trace_assign_type(field, entry); - trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%ld.%09ld\n", + trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%ld.%09ld", field->seqnum, field->duration, field->outer_duration, field->timestamp.tv_sec, field->timestamp.tv_nsec); + if (field->nmi_count) { + /* + * The generic sched_clock() is not NMI safe, thus + * we only record the count and not the time. + */ + if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) + trace_seq_printf(s, " nmi-total:%llu", + field->nmi_total_ts); + trace_seq_printf(s, " nmi-count:%u", + field->nmi_count); + } + + trace_seq_putc(s, '\n'); + return trace_handle_return(s); }