perf/x86/intel: Add Goldmont Plus CPU PMU support
Add perf core PMU support for Intel Goldmont Plus CPU cores: - The init code is based on Goldmont. - There is a new cache event list, based on the Goldmont cache event list. - All four general-purpose performance counters support PEBS. - The first general-purpose performance counter is for reduced skid PEBS mechanism. Using :ppp to indicate the event which want to do reduced skid PEBS. - Goldmont Plus has 4-wide pipeline for Topdown Signed-off-by: Kan Liang <kan.liang@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: acme@kernel.org Link: http://lkml.kernel.org/r/20170712134423.17766-1-kan.liang@intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
5c10b048c3
commit
dd0b06b551
@ -1708,6 +1708,120 @@ static __initconst const u64 glm_hw_cache_extra_regs
|
||||
},
|
||||
};
|
||||
|
||||
static __initconst const u64 glp_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||
[C(L1D)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
|
||||
[C(RESULT_MISS)] = 0x0,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
|
||||
[C(RESULT_MISS)] = 0x0,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = 0x0,
|
||||
[C(RESULT_MISS)] = 0x0,
|
||||
},
|
||||
},
|
||||
[C(L1I)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */
|
||||
[C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = -1,
|
||||
[C(RESULT_MISS)] = -1,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = 0x0,
|
||||
[C(RESULT_MISS)] = 0x0,
|
||||
},
|
||||
},
|
||||
[C(LL)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = 0x0,
|
||||
[C(RESULT_MISS)] = 0x0,
|
||||
},
|
||||
},
|
||||
[C(DTLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
|
||||
[C(RESULT_MISS)] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
|
||||
[C(RESULT_MISS)] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = 0x0,
|
||||
[C(RESULT_MISS)] = 0x0,
|
||||
},
|
||||
},
|
||||
[C(ITLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */
|
||||
[C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = -1,
|
||||
[C(RESULT_MISS)] = -1,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = -1,
|
||||
[C(RESULT_MISS)] = -1,
|
||||
},
|
||||
},
|
||||
[C(BPU)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
|
||||
[C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = -1,
|
||||
[C(RESULT_MISS)] = -1,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = -1,
|
||||
[C(RESULT_MISS)] = -1,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static __initconst const u64 glp_hw_cache_extra_regs
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||
[C(LL)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = GLM_DEMAND_READ|
|
||||
GLM_LLC_ACCESS,
|
||||
[C(RESULT_MISS)] = GLM_DEMAND_READ|
|
||||
GLM_LLC_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = GLM_DEMAND_WRITE|
|
||||
GLM_LLC_ACCESS,
|
||||
[C(RESULT_MISS)] = GLM_DEMAND_WRITE|
|
||||
GLM_LLC_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = 0x0,
|
||||
[C(RESULT_MISS)] = 0x0,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
|
||||
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
|
||||
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
|
||||
@ -3016,6 +3130,9 @@ static int hsw_hw_config(struct perf_event *event)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct event_constraint counter0_constraint =
|
||||
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);
|
||||
|
||||
static struct event_constraint counter2_constraint =
|
||||
EVENT_CONSTRAINT(0, 0x4, 0);
|
||||
|
||||
@ -3037,6 +3154,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
return c;
|
||||
}
|
||||
|
||||
static struct event_constraint *
|
||||
glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
struct perf_event *event)
|
||||
{
|
||||
struct event_constraint *c;
|
||||
|
||||
/* :ppp means to do reduced skid PEBS which is PMC0 only. */
|
||||
if (event->attr.precise_ip == 3)
|
||||
return &counter0_constraint;
|
||||
|
||||
c = intel_get_event_constraints(cpuc, idx, event);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* Broadwell:
|
||||
*
|
||||
@ -3838,6 +3970,32 @@ __init int intel_pmu_init(void)
|
||||
pr_cont("Goldmont events, ");
|
||||
break;
|
||||
|
||||
case INTEL_FAM6_ATOM_GEMINI_LAKE:
|
||||
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
|
||||
sizeof(hw_cache_extra_regs));
|
||||
|
||||
intel_pmu_lbr_init_skl();
|
||||
|
||||
x86_pmu.event_constraints = intel_slm_event_constraints;
|
||||
x86_pmu.pebs_constraints = intel_glp_pebs_event_constraints;
|
||||
x86_pmu.extra_regs = intel_glm_extra_regs;
|
||||
/*
|
||||
* It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
|
||||
* for precise cycles.
|
||||
*/
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.get_event_constraints = glp_get_event_constraints;
|
||||
x86_pmu.cpu_events = glm_events_attrs;
|
||||
/* Goldmont Plus has 4-wide pipeline */
|
||||
event_attr_td_total_slots_scale_glm.event_str = "4";
|
||||
pr_cont("Goldmont plus events, ");
|
||||
break;
|
||||
|
||||
case INTEL_FAM6_WESTMERE:
|
||||
case INTEL_FAM6_WESTMERE_EP:
|
||||
case INTEL_FAM6_WESTMERE_EX:
|
||||
|
@ -651,6 +651,12 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint intel_glp_pebs_event_constraints[] = {
|
||||
/* Allow all events as PEBS with no flags */
|
||||
INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
|
||||
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
|
||||
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
|
||||
|
@ -879,6 +879,8 @@ extern struct event_constraint intel_slm_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_glm_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_glp_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_nehalem_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_westmere_pebs_event_constraints[];
|
||||
|
Loading…
Reference in New Issue
Block a user