Add presets for Haswell and Ivy Bridge.
This commit is contained in:
parent
c09fda2a0f
commit
32346915e6
158
papi-intel.patch
Normal file
158
papi-intel.patch
Normal file
@ -0,0 +1,158 @@
|
||||
commit 4c0349c04d1ede3776a25ad1444a2c07d99bef6e
|
||||
Author: James Ralph <ralph@icl.utk.edu>
|
||||
Date: Mon Aug 26 10:23:52 2013 -0400
|
||||
|
||||
papi_events.csv: First draft preset events on HSW
|
||||
|
||||
Contributed by Nils Smeds
|
||||
-------------------------
|
||||
Here is a suggestion for addition to Hsw counters. These are not
|
||||
rigorously tested. It compiles and loads.
|
||||
I'm rather uncertain on many of the events so I am hoping that adding
|
||||
events like this will get some useful
|
||||
feedback from the community so that we can improve.
|
||||
-------------------------
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 2e0da80..39ec16c 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -606,6 +606,63 @@ PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
|
||||
PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:DEMAND_LD_MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK
|
||||
#PRESET,PAPI_HW_INT,NOT_DERIVED,HW_INTERRUPTS
|
||||
#
|
||||
+
|
||||
+# Intel Haswell events (and most likely also Sandy Bridge)
|
||||
+CPU,hsw
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
|
||||
+PRESET,PAPI_REF_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:REF_XCLK
|
||||
+# Loads and stores
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_LOADS
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOPS_RETIRED:ALL_STORES
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_STORES
|
||||
+# L1 cache
|
||||
+PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
|
||||
+PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
|
||||
+PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
|
||||
+# L2 cache
|
||||
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
|
||||
+PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
|
||||
+PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
|
||||
+PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2_RQSTS:REFERENCES
|
||||
+PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
|
||||
+PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
|
||||
+# L3 cache
|
||||
+PRESET,PAPI_L3_TCA,NOT_DERIVED,LONGEST_LAT_CACHE:REFERENCE
|
||||
+PRESET,PAPI_L3_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
|
||||
+PRESET,PAPI_L3_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_MISS
|
||||
+# SMP
|
||||
+PRESET,PAPI_CA_SNP,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_ANY
|
||||
+PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
|
||||
+PRESET,PAPI_CA_CLN,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_RFO
|
||||
+PRESET,PAPI_CA_INV,NOT_DERIVED,OFFCORE_RESPONSE_0:HITM
|
||||
+PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_FWD
|
||||
+# TLB
|
||||
+PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK,DTLB_STORE_MISSES:MISS_CAUSES_A_WALK
|
||||
+PRESET,PAPI_TLB_IM,NOT_DERIVED,ITLB_MISSES:MISS_CAUSES_A_WALK
|
||||
+# Prefetcher
|
||||
+PRESET,PAPI_PRF_DM,NOT_DERIVED,L2_RQSTS:L2_PF_MISS
|
||||
+# Stalls
|
||||
+PRESET,PAPI_MEM_WCY,NOT_DERIVED,RESOURCE_STALLS:SB
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,RESOURCE_STALLS:ANY
|
||||
+PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ:EMPTY
|
||||
+PRESET,PAPI_STL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=1:i=1
|
||||
+PRESET,PAPI_FUL_ICY,DERIVED_ADD,IDQ:ALL_DSB_CYCLES_4_UOPS,IDQ:ALL_MITE_CYCLES_4_UOPS
|
||||
+PRESET,PAPI_FUL_CCY,NOT_DERIVED,UOPS_RETIRED:ALL:c=4
|
||||
+# Branches
|
||||
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL
|
||||
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL
|
||||
+PRESET,PAPI_BR_TKN,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_INST_RETIRED:NOT_TAKEN
|
||||
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:NOT_TAKEN
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:CONDITIONAL
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:CONDITIONAL,BR_MISP_RETIRED:CONDITIONAL
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
|
||||
+# End of hsw list
|
||||
+#
|
||||
CPU,Intel Core2
|
||||
CPU,Intel Core
|
||||
CPU,core
|
||||
commit f20568575d3d8023f4f97d3d968a606a51a1e01f
|
||||
Author: James Ralph <ralph@icl.utk.edu>
|
||||
Date: Tue Sep 17 09:06:50 2013 -0400
|
||||
|
||||
papi_events.csv: Add PAPI_L1_ICM for Haswell
|
||||
|
||||
Thanks to Maurice Marks of Unisys for the contribution
|
||||
-------------
|
||||
I've continued testing on Haswell. By comparison with Vtune and Emon on
|
||||
Haswell I found that we can use
|
||||
the counter L2_RQSTS:ALL_CODE_RD for PAPI_L1_ICM, which is a very useful
|
||||
measure.
|
||||
|
||||
Attached is my current version of papi_events.csv with Haswell fixes.
|
||||
-------------
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 39ec16c..01821a8 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -620,6 +620,7 @@ PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_UOPS_RETIRED:ALL_LOADS,MEM_UOPS_RETIRED:ALL_
|
||||
PRESET,PAPI_L1_TCA,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT:L1_MISS
|
||||
PRESET,PAPI_L1_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_HIT
|
||||
PRESET,PAPI_L1_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L1_MISS
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
|
||||
# L2 cache
|
||||
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
|
||||
PRESET,PAPI_L2_DCH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
|
||||
commit b2d643df6a20a85e24a2f797c6bea164ed099a84
|
||||
Author: Vince Weaver <vincent.weaver@maine.edu>
|
||||
Date: Tue Nov 5 16:09:11 2013 -0500
|
||||
|
||||
Add floating point events for IvyBridge
|
||||
|
||||
Now that Intel has documented them and libpfm4 supports them, PAPI
|
||||
can use them. We just use the same events as on sandybridge.
|
||||
|
||||
Tested on an ivybridge system.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 01821a8..42c1da0 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -576,6 +576,15 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,ILD_STALL:IQ_FULL
|
||||
PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_LOADS
|
||||
PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_UOP_RETIRED:ANY_STORES
|
||||
#
|
||||
+# Counts scalars only; no SSE or AVX is counted; includes speculative
|
||||
+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
|
||||
+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
|
||||
+#
|
||||
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
|
||||
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
|
||||
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
|
||||
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
|
||||
+#
|
||||
# Intel SandyBridge only
|
||||
CPU,snb
|
||||
CPU,snb_ep
|
||||
@@ -586,15 +595,6 @@ PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:CONDITIONAL
|
||||
PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:CONDITIONAL
|
||||
PRESET,PAPI_TLB_DM,DERIVED_ADD,DTLB_LOAD_MISSES:CAUSES_A_WALK,DTLB_STORE_MISSES:CAUSES_A_WALK
|
||||
#
|
||||
-# Counts scalars only; no SSE or AVX is counted; includes speculative
|
||||
-PRESET,PAPI_FP_INS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
|
||||
-PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE
|
||||
-#
|
||||
-PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|N2|8|*|+|+|,FP_COMP_OPS_EXE:SSE_FP_SCALAR_SINGLE,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
|
||||
-PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|N2|4|*|+|+|,FP_COMP_OPS_EXE:SSE_SCALAR_DOUBLE,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
|
||||
-PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|4|*|N1|8|*|+|,FP_COMP_OPS_EXE:SSE_PACKED_SINGLE,SIMD_FP_256:PACKED_SINGLE
|
||||
-PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|2|*|N1|4|*|+|,FP_COMP_OPS_EXE:SSE_FP_PACKED_DOUBLE,SIMD_FP_256:PACKED_DOUBLE
|
||||
-#
|
||||
# Intel IvyBridge only
|
||||
CPU,ivb
|
||||
CPU,ivb_ep
|
@ -2,12 +2,13 @@
|
||||
Summary: Performance Application Programming Interface
|
||||
Name: papi
|
||||
Version: 5.2.0
|
||||
Release: 2%{?dist}
|
||||
Release: 4%{?dist}
|
||||
License: BSD
|
||||
Group: Development/System
|
||||
URL: http://icl.cs.utk.edu/papi/
|
||||
Source0: http://icl.cs.utk.edu/projects/papi/downloads/%{name}-%{version}.tar.gz
|
||||
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
|
||||
Patch100: papi-intel.patch
|
||||
BuildRequires: autoconf
|
||||
BuildRequires: doxygen
|
||||
BuildRequires: ncurses-devel
|
||||
@ -57,6 +58,7 @@ the PAPI user-space libraries and interfaces.
|
||||
|
||||
%prep
|
||||
%setup -q
|
||||
%patch100 -p1
|
||||
|
||||
%build
|
||||
%if %{without bundled_libpfm}
|
||||
@ -138,6 +140,9 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%{_libdir}/*.a
|
||||
|
||||
%changelog
|
||||
* Mon Jan 13 2014 William Cohen <wcohen@redhat.com> - 5.2.0-4
|
||||
- Add presets for Haswell and Ivy Bridge.
|
||||
|
||||
* Wed Aug 14 2013 William Cohen <wcohen@redhat.com> - 5.2.0-2
|
||||
- Enable infiniband and stealtime components.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user