Add presets for Haswell and Ivy Bridge.

This commit is contained in:
William Cohen 2014-01-13 13:34:38 -05:00
parent c09fda2a0f
commit 32346915e6
2 changed files with 164 additions and 1 deletions

158
papi-intel.patch Normal file
View File

@ -0,0 +1,158 @@
commit 4c0349c04d1ede3776a25ad1444a2c07d99bef6e
Author: James Ralph <ralph@icl.utk.edu>
Date: Mon Aug 26 10:23:52 2013 -0400
papi_events.csv: First draft preset events on HSW
Contributed by Nils Smeds
-------------------------
Here is a suggestion for addition to Hsw counters. These are not
rigorously tested. It compiles and loads.
I'm rather uncertain on many of the events so I am hoping that adding
events like this will get some useful
feedback from the community so that we can improve.
-------------------------
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 2e0da80..39ec16c 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -606,6 +606,63 @@ PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK
#PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INTERRUPTS
#
+
+# Intel Haswell events (and most likely also Sandy Bridge)
+CPU,hsw
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
+PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK
+# Loads and stores
+PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS
+PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES
+PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES
+# L1 cache
+PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
+PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
+PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
+# L2 cache
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
+PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
+PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
+PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES
+PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
+PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
+# L3 cache
+PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE
+PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
+PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
+# SMP
+PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY
+PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
+PRESET,PAPI_CA_CLN,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_RFO
+PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM
+PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD
+# TLB
+PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK
+PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES:MISS_CAUSES_A_WALK
+# Prefetcher
+PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:L2_PF_MISS
+# Stalls
+PRESET,PAPI_MEM_WCY,NOT_DERIVED,RESOURCE_STALLS:SB
+PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS:ANY
+PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY
+PRESET,PAPI_STL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=1:i=1
+PRESET,PAPI_FUL_ICY,DERIVED_ADD,IDQ:ALL_DSB_CYCLES_4_UOPS,IDQ:ALL_MITE_CYCLES_4_UOPS
+PRESET,PAPI_FUL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=4
+# Branches
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL
+PRESET,PAPI_BR_TKN,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_INST_RETIRED:NOT_TAKEN
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:NOT_TAKEN
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:CONDITIONAL
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_MISP_RETIRED:CONDITIONAL
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
+# End of hsw list
+#
CPU,Intel Core2
CPU,Intel Core
CPU,core
commit f20568575d3d8023f4f97d3d968a606a51a1e01f
Author: James Ralph <ralph@icl.utk.edu>
Date: Tue Sep 17 09:06:50 2013 -0400
papi_events.csv: Add PAPI_L1_ICM for Haswell
Thanks to Maurice Marks of Unisys for the contribution
-------------
I've continued testing on Haswell. By comparison with Vtune and Emon on
Haswell I found that we can use
the counter L2_RQSTS:ALL_CODE_RD for PAPI_L1_ICM, which is a very useful
measure.
Attached is my current version of papi_events.csv with Haswell fixes.
-------------
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 39ec16c..01821a8 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -620,6 +620,7 @@ PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_
PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
# L2 cache
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
commit b2d643df6a20a85e24a2f797c6bea164ed099a84
Author: Vince Weaver <vincent.weaver@maine.edu>
Date: Tue Nov 5 16:09:11 2013 -0500
Add floating point events for IvyBridge
Now that Intel has documented them and libpfm4 supports them, PAPI
can use them. We just use the same events as on sandybridge.
Tested on an ivybridge system.
diff --git a/src/papi_events.csv b/src/papi_events.csv
index 01821a8..42c1da0 100644
--- a/src/papi_events.csv
+++ b/src/papi_events.csv
@@ -576,6 +576,15 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,ILD_STALL:IQ_FULL
PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_LOADS
PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_STORES
#
+# Counts scalars only; no SSE or AVX is counted; includes speculative
+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
+#
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
+#
# Intel SandyBridge only
CPU,snb
CPU,snb_ep
@@ -586,15 +595,6 @@ PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL
PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL
PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK
#
-# Counts scalars only; no SSE or AVX is counted; includes speculative
-PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
-PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
-#
-PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
-PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
-PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
-PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
-#
# Intel IvyBridge only
CPU,ivb
CPU,ivb_ep

View File

@ -2,12 +2,13 @@
Summary: Performance Application Programming Interface
Name: papi
Version: 5.2.0
Release: 2%{?dist}
Release: 4%{?dist}
License: BSD
Group: Development/System
URL: http://icl.cs.utk.edu/papi/
Source0: http://icl.cs.utk.edu/projects/papi/downloads/%{name}-%{version}.tar.gz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
Patch100: papi-intel.patch
BuildRequires: autoconf
BuildRequires: doxygen
BuildRequires: ncurses-devel
@ -57,6 +58,7 @@ the PAPI user-space libraries and interfaces.
%prep
%setup -q
%patch100 -p1
%build
%if %{without bundled_libpfm}
@ -138,6 +140,9 @@ rm -rf $RPM_BUILD_ROOT
%{_libdir}/*.a
%changelog
* Mon Jan 13 2014 William Cohen <wcohen@redhat.com> - 5.2.0-4
- Add presets for Haswell and Ivy Bridge.
* Wed Aug 14 2013 William Cohen <wcohen@redhat.com> - 5.2.0-2
- Enable infiniband and stealtime components.