summaryrefslogtreecommitdiff
path: root/lib/libpmc
diff options
context:
space:
mode:
authorFabien Thomas <fabient@FreeBSD.org>2012-10-16 13:27:20 +0000
committerFabien Thomas <fabient@FreeBSD.org>2012-10-16 13:27:20 +0000
commita946fc35a665fc8e3b05f9e5f139bcb290ba642b (patch)
tree2d11103e618df8430b60b0e12c310a1ea92b5bb4 /lib/libpmc
parent20816506ac518803a4e4f10b18f7ad37bfc9e327 (diff)
Notes
Diffstat (limited to 'lib/libpmc')
-rw-r--r--lib/libpmc/Makefile1
-rw-r--r--lib/libpmc/libpmc.c75
-rw-r--r--lib/libpmc/pmc.ivybridge.3880
-rw-r--r--lib/libpmc/pmc.sandybridge.3662
4 files changed, 1291 insertions, 327 deletions
diff --git a/lib/libpmc/Makefile b/lib/libpmc/Makefile
index 9e6a0b1b6f79..33234c49e518 100644
--- a/lib/libpmc/Makefile
+++ b/lib/libpmc/Makefile
@@ -28,6 +28,7 @@ MAN+= pmc.atom.3
MAN+= pmc.core.3
MAN+= pmc.core2.3
MAN+= pmc.iaf.3
+MAN+= pmc.ivybridge.3
MAN+= pmc.ucf.3
MAN+= pmc.k7.3
MAN+= pmc.k8.3
diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c
index 5024c3b1aa13..eda2c8c7a169 100644
--- a/lib/libpmc/libpmc.c
+++ b/lib/libpmc/libpmc.c
@@ -182,6 +182,11 @@ static const struct pmc_event_descr corei7_event_table[] =
__PMC_EV_ALIAS_COREI7()
};
+static const struct pmc_event_descr ivybridge_event_table[] =
+{
+ __PMC_EV_ALIAS_IVYBRIDGE()
+};
+
static const struct pmc_event_descr sandybridge_event_table[] =
{
__PMC_EV_ALIAS_SANDYBRIDGE()
@@ -221,6 +226,7 @@ PMC_MDEP_TABLE(atom, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC);
PMC_MDEP_TABLE(core, IAP, PMC_CLASS_SOFT, PMC_CLASS_TSC);
PMC_MDEP_TABLE(core2, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC);
PMC_MDEP_TABLE(corei7, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
+PMC_MDEP_TABLE(ivybridge, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC);
PMC_MDEP_TABLE(sandybridge, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
PMC_MDEP_TABLE(westmere, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
PMC_MDEP_TABLE(k7, K7, PMC_CLASS_SOFT, PMC_CLASS_TSC);
@@ -257,6 +263,7 @@ PMC_CLASS_TABLE_DESC(atom, IAP, atom, iap);
PMC_CLASS_TABLE_DESC(core, IAP, core, iap);
PMC_CLASS_TABLE_DESC(core2, IAP, core2, iap);
PMC_CLASS_TABLE_DESC(corei7, IAP, corei7, iap);
+PMC_CLASS_TABLE_DESC(ivybridge, IAP, ivybridge, iap);
PMC_CLASS_TABLE_DESC(sandybridge, IAP, sandybridge, iap);
PMC_CLASS_TABLE_DESC(westmere, IAP, westmere, iap);
PMC_CLASS_TABLE_DESC(ucf, UCF, ucf, ucf);
@@ -362,14 +369,14 @@ static struct pmc_op_getdyneventinfo soft_event_info;
/* Event masks for events */
struct pmc_masks {
const char *pm_name;
- const uint32_t pm_value;
+ const uint64_t pm_value;
};
#define PMCMASK(N,V) { .pm_name = #N, .pm_value = (V) }
#define NULLMASK { .pm_name = NULL }
#if defined(__amd64__) || defined(__i386__)
static int
-pmc_parse_mask(const struct pmc_masks *pmask, char *p, uint32_t *evmask)
+pmc_parse_mask(const struct pmc_masks *pmask, char *p, uint64_t *evmask)
{
const struct pmc_masks *pm;
char *q, *r;
@@ -558,6 +565,8 @@ static struct pmc_event_alias core2_aliases_without_iaf[] = {
#define atom_aliases_without_iaf core2_aliases_without_iaf
#define corei7_aliases core2_aliases
#define corei7_aliases_without_iaf core2_aliases_without_iaf
+#define ivybridge_aliases core2_aliases
+#define ivybridge_aliases_without_iaf core2_aliases_without_iaf
#define sandybridge_aliases core2_aliases
#define sandybridge_aliases_without_iaf core2_aliases_without_iaf
#define westmere_aliases core2_aliases
@@ -660,7 +669,7 @@ static struct pmc_masks iap_transition_mask[] = {
NULLMASK
};
-static struct pmc_masks iap_rsp_mask[] = {
+static struct pmc_masks iap_rsp_mask_i7_wm[] = {
PMCMASK(DMND_DATA_RD, (1 << 0)),
PMCMASK(DMND_RFO, (1 << 1)),
PMCMASK(DMND_IFETCH, (1 << 2)),
@@ -679,12 +688,43 @@ static struct pmc_masks iap_rsp_mask[] = {
NULLMASK
};
+static struct pmc_masks iap_rsp_mask_sb_ib[] = {
+ PMCMASK(REQ_DMND_DATA_RD, (1ULL << 0)),
+ PMCMASK(REQ_DMND_RFO, (1ULL << 1)),
+ PMCMASK(REQ_DMND_IFETCH, (1ULL << 2)),
+ PMCMASK(REQ_WB, (1ULL << 3)),
+ PMCMASK(REQ_PF_DATA_RD, (1ULL << 4)),
+ PMCMASK(REQ_PF_RFO, (1ULL << 5)),
+ PMCMASK(REQ_PF_IFETCH, (1ULL << 6)),
+ PMCMASK(REQ_PF_LLC_DATA_RD, (1ULL << 7)),
+ PMCMASK(REQ_PF_LLC_RFO, (1ULL << 8)),
+ PMCMASK(REQ_PF_LLC_IFETCH, (1ULL << 9)),
+ PMCMASK(REQ_BUS_LOCKS, (1ULL << 10)),
+ PMCMASK(REQ_STRM_ST, (1ULL << 11)),
+ PMCMASK(REQ_OTHER, (1ULL << 15)),
+ PMCMASK(RES_ANY, (1ULL << 16)),
+ PMCMASK(RES_SUPPLIER_SUPP, (1ULL << 17)),
+ PMCMASK(RES_SUPPLIER_LLC_HITM, (1ULL << 18)),
+ PMCMASK(RES_SUPPLIER_LLC_HITE, (1ULL << 19)),
+ PMCMASK(RES_SUPPLIER_LLC_HITS, (1ULL << 20)),
+ PMCMASK(RES_SUPPLIER_LLC_HITF, (1ULL << 21)),
+ PMCMASK(RES_SUPPLIER_LOCAL, (1ULL << 22)),
+ PMCMASK(RES_SNOOP_SNPI_NONE, (1ULL << 31)),
+ PMCMASK(RES_SNOOP_SNP_NO_NEEDED,(1ULL << 32)),
+ PMCMASK(RES_SNOOP_SNP_MISS, (1ULL << 33)),
+ PMCMASK(RES_SNOOP_HIT_NO_FWD, (1ULL << 34)),
+ PMCMASK(RES_SNOOP_HIT_FWD, (1ULL << 35)),
+ PMCMASK(RES_SNOOP_HITM, (1ULL << 36)),
+ PMCMASK(RES_NON_DRAM, (1ULL << 37)),
+ NULLMASK
+};
+
static int
iap_allocate_pmc(enum pmc_event pe, char *ctrspec,
struct pmc_op_pmcallocate *pmc_config)
{
char *e, *p, *q;
- uint32_t cachestate, evmask, rsp;
+ uint64_t cachestate, evmask, rsp;
int count, n;
pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE |
@@ -750,7 +790,13 @@ iap_allocate_pmc(enum pmc_event pe, char *ctrspec,
} else if (cpu_info.pm_cputype == PMC_CPU_INTEL_COREI7 ||
cpu_info.pm_cputype == PMC_CPU_INTEL_WESTMERE) {
if (KWPREFIXMATCH(p, IAP_KW_RSP "=")) {
- n = pmc_parse_mask(iap_rsp_mask, p, &rsp);
+ n = pmc_parse_mask(iap_rsp_mask_i7_wm, p, &rsp);
+ } else
+ return (-1);
+ } else if (cpu_info.pm_cputype == PMC_CPU_INTEL_SANDYBRIDGE ||
+ cpu_info.pm_cputype == PMC_CPU_INTEL_IVYBRIDGE) {
+ if (KWPREFIXMATCH(p, IAP_KW_RSP "=")) {
+ n = pmc_parse_mask(iap_rsp_mask_sb_ib, p, &rsp);
} else
return (-1);
} else
@@ -1069,7 +1115,8 @@ k8_allocate_pmc(enum pmc_event pe, char *ctrspec,
{
char *e, *p, *q;
int n;
- uint32_t count, evmask;
+ uint32_t count;
+ uint64_t evmask;
const struct pmc_masks *pm, *pmask;
pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
@@ -1551,7 +1598,8 @@ p4_allocate_pmc(enum pmc_event pe, char *ctrspec,
char *e, *p, *q;
int count, has_tag, has_busreqtype, n;
- uint32_t evmask, cccractivemask;
+ uint32_t cccractivemask;
+ uint64_t evmask;
const struct pmc_masks *pm, *pmask;
pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
@@ -1979,7 +2027,7 @@ p6_allocate_pmc(enum pmc_event pe, char *ctrspec,
struct pmc_op_pmcallocate *pmc_config)
{
char *e, *p, *q;
- uint32_t evmask;
+ uint64_t evmask;
int count, n;
const struct pmc_masks *pm, *pmask;
@@ -2611,6 +2659,10 @@ pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames,
ev = corei7_event_table;
count = PMC_EVENT_TABLE_SIZE(corei7);
break;
+ case PMC_CPU_INTEL_IVYBRIDGE:
+ ev = ivybridge_event_table;
+ count = PMC_EVENT_TABLE_SIZE(ivybridge);
+ break;
case PMC_CPU_INTEL_SANDYBRIDGE:
ev = sandybridge_event_table;
count = PMC_EVENT_TABLE_SIZE(sandybridge);
@@ -2899,6 +2951,9 @@ pmc_init(void)
pmc_class_table[n++] = &corei7uc_class_table_descr;
PMC_MDEP_INIT_INTEL_V2(corei7);
break;
+ case PMC_CPU_INTEL_IVYBRIDGE:
+ PMC_MDEP_INIT_INTEL_V2(ivybridge);
+ break;
case PMC_CPU_INTEL_SANDYBRIDGE:
pmc_class_table[n++] = &ucf_class_table_descr;
pmc_class_table[n++] = &sandybridgeuc_class_table_descr;
@@ -3030,6 +3085,10 @@ _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu)
ev = corei7_event_table;
evfence = corei7_event_table + PMC_EVENT_TABLE_SIZE(corei7);
break;
+ case PMC_CPU_INTEL_IVYBRIDGE:
+ ev = ivybridge_event_table;
+ evfence = ivybridge_event_table + PMC_EVENT_TABLE_SIZE(ivybridge);
+ break;
case PMC_CPU_INTEL_SANDYBRIDGE:
ev = sandybridge_event_table;
evfence = sandybridge_event_table + PMC_EVENT_TABLE_SIZE(sandybridge);
diff --git a/lib/libpmc/pmc.ivybridge.3 b/lib/libpmc/pmc.ivybridge.3
new file mode 100644
index 000000000000..bd05eb6ed5bd
--- /dev/null
+++ b/lib/libpmc/pmc.ivybridge.3
@@ -0,0 +1,880 @@
+.\" Copyright (c) 2012 Fabien Thomas. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd August 24, 2012
+.Dt PMC.IVYBRIDGE 3
+.Os
+.Sh NAME
+.Nm pmc.ivybridge
+.Nd measurement events for
+.Tn Intel
+.Tn Ivy Bridge
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Ivy Bridge"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs may contain up to three classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_IAP"
+.It Li PMC_CLASS_IAF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_IAP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Ivy Bridge PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developer's Manual"
+Intel(R) 64 and IA-32 Architectures Software Developers Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-043US"
+.%D May 2012
+.%Q "Intel Corporation"
+.Re
+.Ss IVYBRIDGE FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.iaf 3 .
+.Ss IVYBRIDGE PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li rsp= Ns Ar value
+Configure the Off-core Response bits.
+.Bl -tag -width indent
+.It Li REQ_DMND_DATA_RD
+Counts the number of demand and DCU prefetch data reads of full and partial
+cachelines as well as demand data page table entry cacheline reads. Does not
+count L2 data read prefetches or instruction fetches.
+.It Li REQ_DMND_RFO
+Counts the number of demand and DCU prefetch reads for ownership (RFO)
+requests generated by a write to data cacheline. Does not count L2 RFO
+prefetches.
+.It Li REQ_DMND_IFETCH
+Counts the number of demand and DCU prefetch instruction cacheline reads.
+Does not count L2 code read prefetches.
+.It Li REQ_WB
+Counts the number of writeback (modified to exclusive) transactions.
+.It Li REQ_PF_DATA_RD
+Counts the number of data cacheline reads generated by L2 prefetchers.
+.It Li REQ_PF_RFO
+Counts the number of RFO requests generated by L2 prefetchers.
+.It Li REQ_PF_IFETCH
+Counts the number of code reads generated by L2 prefetchers.
+.It Li REQ_PF_LLC_DATA_RD
+L2 prefetcher to L3 for loads.
+.It Li REQ_PF_LLC_RFO
+RFO requests generated by L2 prefetcher
+.It Li REQ_PF_LLC_IFETCH
+L2 prefetcher to L3 for instruction fetches.
+.It Li REQ_BUS_LOCKS
+Bus lock and split lock requests.
+.It Li REQ_STRM_ST
+Streaming store requests.
+.It Li REQ_OTHER
+Any other request that crosses IDI, including I/O.
+.It Li RES_ANY
+Catch all value for any response types.
+.It Li RES_SUPPLIER_NO_SUPP
+No Supplier Information available.
+.It Li RES_SUPPLIER_LLC_HITM
+M-state initial lookup stat in L3.
+.It Li RES_SUPPLIER_LLC_HITE
+E-state.
+.It Li RES_SUPPLIER_LLC_HITS
+S-state.
+.It Li RES_SUPPLIER_LLC_HITF
+F-state.
+.It Li RES_SUPPLIER_LOCAL
+Local DRAM Controller.
+.It Li RES_SNOOP_SNPI_NONE
+No details on snoop-related information.
+.It Li RES_SNOOP_SNP_NO_NEEDED
+No snoop was needed to satisfy the request.
+.It Li RES_SNOOP_SNP_MISS
+A snoop was needed and it missed all snooped caches:
+-For LLC Hit, ReslHitl was returned by all cores
+-For LLC Miss, Rspl was returned by all sockets and data was returned from
+DRAM.
+.It Li RES_SNOOP_HIT_NO_FWD
+A snoop was needed and it hits in at least one snooped cache. Hit denotes a
+cache-line was valid before snoop effect. This includes:
+-Snoop Hit w/ Invalidation (LLC Hit, RFO)
+-Snoop Hit, Left Shared (LLC Hit/Miss, IFetch/Data_RD)
+-Snoop Hit w/ Invalidation and No Forward (LLC Miss, RFO Hit S)
+In the LLC Miss case, data is returned from DRAM.
+.It Li RES_SNOOP_HIT_FWD
+A snoop was needed and data was forwarded from a remote socket.
+This includes:
+-Snoop Forward Clean, Left Shared (LLC Hit/Miss, IFetch/Data_RD/RFT).
+.It Li RES_SNOOP_HITM
+A snoop was needed and it HitM-ed in local or remote cache. HitM denotes a
+cache-line was in modified state before effect as a results of snoop. This
+includes:
+-Snoop HitM w/ WB (LLC miss, IFetch/Data_RD)
+-Snoop Forward Modified w/ Invalidation (LLC Hit/Miss, RFO)
+-Snoop MtoS (LLC Hit, IFetch/Data_RD).
+.It Li RES_NON_DRAM
+Target was non-DRAM system address. This includes MMIO transactions.
+.El
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.It Li os
+Configure the PMC to count events happening at processor privilege
+level 0.
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Ss Event Specifiers (Programmable PMCs)
+Ivy Bridge programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li LD_BLOCKS.STORE_FORWARD
+.Pq Event 03H , Umask 02H
+loads blocked by overlapping with store buffer that cannot be forwarded .
+.It Li MISALIGN_MEM_REF.LOADS
+.Pq Event 05H , Umask 01H
+Speculative cache-line split load uops dispatched to L1D.
+.It Li MISALIGN_MEM_REF.STORES
+.Pq Event 05H , Umask 02H
+Speculative cache-line split Store- address uops dispatched to L1D.
+.It Li LD_BLOCKS_PARTIAL.ADDRESS_ALIAS
+.Pq Event 07H , Umask 01H
+False dependencies in MOB due to partial compare on address.
+.It Li DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK
+.Pq Event 08H , Umask 81H
+Misses in all TLB levels that cause a page walk of any page size from demand loads.
+.It Li DTLB_LOAD_MISSES.DEMAND_LD_WALK_COMPLETED
+.Pq Event 08H , Umask 82H
+Misses in all TLB levels that caused page walk completed of any size by demand loads.
+.It Li DTLB_LOAD_MISSES.DEMAND_LD_WALK_DURATION
+.Pq Event 08H , Umask 84H
+Cycle PMH is busy with a walk due to demand loads.
+.It Li UOPS_ISSUED.ANY
+.Pq Event 0EH , Umask 01H
+Increments each cycle the # of Uops issued by the RAT to RS.
+Set Cmask = 1, Inv = 1to count stalled cycles.
+Set Cmask = 1, Inv = 1, Any= 1to count stalled cycles of this core.
+.It Li UOPS_ISSUED.FLAGS_MERGE
+.Pq Event 0EH , Umask 10H
+Number of flags-merge uops allocated. Such uops adds delay.
+.It Li UOPS_ISSUED.SLOW_LEA
+.Pq Event 0EH , Umask 20H
+Number of slow LEA or similar uops allocated. Such uop has 3 sources (e.g. 2
+sources + immediate) regardless if as a result of LEA instruction or not.
+.It Li UOPS_ISSUED.SINGLE_MUL
+.Pq Event 0EH , Umask 40H
+Number of multiply packed/scalar single precision uops allocated.
+.It Li ARITH.FPU_DIV_ACTIVE
+.Pq Event 14H , Umask 01H
+Cycles that the divider is active, includes INT and FP. Set 'edge =1,
+cmask=1' to count the number of divides.
+.It Li L2_RQSTS.DEMAND_DATA_RD_HIT
+.Pq Event 24H , Umask 01H
+Demand Data Read requests that hit L2 cache.
+.It Li L2_RQSTS.ALL_DEMAND_DATA_RD
+.Pq Event 24H , Umask 03H
+Counts any demand and L1 HW prefetch data load requests to L2.
+.It Li L2_RQSTS.RFO_HITS
+.Pq Event 24H , Umask 04H
+Counts the number of store RFO requests that hit the L2 cache.
+.It Li L2_RQSTS.RFO_MISS
+.Pq Event 24H , Umask 08H
+Counts the number of store RFO requests that miss the L2 cache.
+.It Li L2_RQSTS.ALL_RFO
+.Pq Event 24H , Umask 0CH
+Counts all L2 store RFO requests.
+.It Li L2_RQSTS.CODE_RD_HIT
+.Pq Event 24H , Umask 10H
+Number of instruction fetches that hit the L2 cache.
+.It Li L2_RQSTS.CODE_RD_MISS
+.Pq Event 24H , Umask 20H
+Number of instruction fetches that missed the L2 cache.
+.It Li L2_RQSTS.ALL_CODE_RD
+.Pq Event 24H , Umask 30H
+Counts all L2 code requests.
+.It Li L2_RQSTS.PF_HIT
+.Pq Event 24H , Umask 40H
+Counts all L2 HW prefetcher requests that hit L2.
+.It Li L2_RQSTS.PF_MISS
+.Pq Event 24H , Umask 80H
+Counts all L2 HW prefetcher requests that missed L2.
+.It Li L2_RQSTS.ALL_PF
+.Pq Event 24H , Umask C0H
+Counts all L2 HW prefetcher requests.
+.It Li L2_STORE_LOCK_RQSTS.MISS
+.Pq Event 27H , Umask 01H
+RFOs that miss cache lines.
+.It Li L2_STORE_LOCK_RQSTS.HIT_M
+.Pq Event 27H , Umask 08H
+RFOs that hit cache lines in M state.
+.It Li L2_STORE_LOCK_RQSTS.ALL
+.Pq Event 27H , Umask 0FH
+RFOs that access cache lines in any state.
+.It Li L2_L1D_WB_RQSTS.MISS
+.Pq Event 28H , Umask 01H
+Not rejected writebacks that missed LLC.
+.It Li L2_L1D_WB_RQSTS.HIT_E
+.Pq Event 28H , Umask 04H
+Not rejected writebacks from L1D to L2 cache lines in E state.
+.It Li L2_L1D_WB_RQSTS.HIT_M
+.Pq Event 28H , Umask 08H
+Not rejected writebacks from L1D to L2 cache lines in M state.
+.It Li L2_L1D_WB_RQSTS.ALL
+.Pq Event 28H , Umask 0FH
+Not rejected writebacks from L1D to L2 cache lines in any state.
+.It Li LONGEST_LAT_CACHE.REFERENCE
+.Pq Event 2EH , Umask 4FH
+This event counts requests originating from the core that reference a cache
+line in the last level cache.
+.It Li LONGEST_LAT_CACHE.MISS
+.Pq Event 2EH , Umask 41H
+This event counts each cache miss condition for references to the last level
+cache.
+.It Li CPU_CLK_UNHALTED.THREAD_P
+.Pq Event 3CH , Umask 00H
+Counts the number of thread cycles while the thread is not in a halt state.
+The thread enters the halt state when it is running the HLT instruction. The
+core frequency may change from time to time due to power or thermal
+throttling.
+.It Li CPU_CLK_THREAD_UNHALTED.REF_XCLK
+.Pq Event 3CH , Umask 01H
+Increments at the frequency of XCLK (100 MHz) when not halted.
+.It Li L1D_PEND_MISS.PENDING
+.Pq Event 48H , Umask 01H
+Increments the number of outstanding L1D misses every cycle. Set Cmaks = 1
+and Edge =1 to count occurrences.
+Counter 2 only.
+Set Cmask = 1 to count cycles.
+.It Li DTLB_STORE_MISSES.MISS_CAUSES_A_WALK
+.Pq Event 49H , Umask 01H
+Miss in all TLB levels causes an page walk of any page size (4K/2M/4M/1G).
+.It Li DTLB_STORE_MISSES.WALK_COMPLETED
+.Pq Event 49H , Umask 02H
+Miss in all TLB levels causes a page walk that completes of any page size
+(4K/2M/4M/1G).
+.It Li DTLB_STORE_MISSES.WALK_DURATION
+.Pq Event 49H , Umask 04H
+Cycles PMH is busy with this walk.
+.It Li DTLB_STORE_MISSES.STLB_HIT
+.Pq Event 49H , Umask 10H
+Store operations that miss the first TLB level but hit the second and do not
+cause page walks.
+.It Li LOAD_HIT_PRE.SW_PF
+.Pq Event 4CH , Umask 01H
+Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.
+.It Li LOAD_HIT_PRE.HW_PF
+.Pq Event 4CH , Umask 02H
+Non-SW-prefetch load dispatches that hit fill buffer allocated for H/W prefetch.
+.It Li L1D.REPLACEMENT
+.Pq Event 51H , Umask 01H
+Counts the number of lines brought into the L1 data cache.
+.It Li MOVE_ELIMINATION.INT_NOT_ELIMINATED
+.Pq Event 58H , Umask 01H
+Number of integer Move Elimination candidate uops that were not eliminated.
+.It Li MOVE_ELIMINATION.SIMD_NOT_ELIMINATED
+.Pq Event 58H , Umask 02H
+Number of SIMD Move Elimination candidate uops that were not eliminated.
+.It Li MOVE_ELIMINATION.INT_ELIMINATED
+.Pq Event 58H , Umask 04H
+Number of integer Move Elimination candidate uops that were eliminated.
+.It Li MOVE_ELIMINATION.SIMD_ELIMINATED
+.Pq Event 58H , Umask 08H
+Number of SIMD Move Elimination candidate uops that were eliminated.
+.It Li CPL_CYCLES.RING0
+.Pq Event 5CH , Umask 01H
+Unhalted core cycles when the thread is in ring 0.
+Use Edge to count transition.
+.It Li CPL_CYCLES.RING123
+.Pq Event 5CH , Umask 02H
+Unhalted core cycles when the thread is not in ring 0.
+.It Li RS_EVENTS.EMPTY_CYCLES
+.Pq Event 5EH , Umask 01H
+Cycles the RS is empty for the thread.
+.It Li TLB_ACCESS.LOAD_STLB_HIT
+.Pq Event 5FH , Umask 01H
+Counts load operations that missed 1st level DTLB but hit the 2nd level.
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD
+.Pq Event 60H , Umask 01H
+Offcore outstanding Demand Data Read transactions in SQ to uncore. Set
+Cmask=1 to count cycles.
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD
+.Pq Event 60H , Umask 02H
+Offcore outstanding Demand Code Read transactions in SQ to uncore. Set
+Cmask=1 to count cycles.
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO
+.Pq Event 60H , Umask 04H
+Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to
+count cycles.
+.It Li OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD
+.Pq Event 60H , Umask 08H
+Offcore outstanding cacheable data read transactions in SQ to uncore. Set
+Cmask=1 to count cycles.
+.It Li LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION
+.Pq Event 63H , Umask 01H
+Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.
+.It Li LOCK_CYCLES.CACHE_LOCK_DURATION
+.Pq Event 63H , Umask 02H
+Cycles in which the L1D is locked.
+.It Li IDQ.EMPTY
+.Pq Event 79H , Umask 02H
+Counts cycles the IDQ is empty.
+.It Li IDQ.MITE_UOPS
+.Pq Event 79H , Umask 04H
+Increment each cycle # of uops delivered to IDQ from MITE path.
+Can combine Umask 04H and 20H.
+Set Cmask = 1 to count cycles.
+.It Li IDQ.DSB_UOPS
+.Pq Event 79H , Umask 08H
+Increment each cycle. # of uops delivered to IDQ from DSB path.
+Can combine Umask 08H and 10H
+Set Cmask = 1 to count cycles.
+.It Li IDQ.MS_DSB_UOPS
+.Pq Event 79H , Umask 10H
+Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set
+Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.
+Can combine Umask 04H, 08H.
+.It Li IDQ.MS_MITE_UOPS
+.Pq Event 79H , Umask 20H
+Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set
+Cmask = 1 to count cycles.
+Can combine Umask 04H, 08H.
+.It Li IDQ.MS_UOPS
+.Pq Event 79H , Umask 30H
+Increment each cycle # of uops delivered to IDQ from MS by either DSB or
+MITE. Set Cmask = 1 to count cycles.
+Can combine Umask 04H, 08H.
+.It Li IDQ.ALL_DSB_CYCLES_ANY_UOPS
+.Pq Event 79H , Umask 18H
+Counts cycles DSB is delivered at least one uops. Set Cmask = 1.
+.It Li IDQ.ALL_DSB_CYCLES_4_UOPS
+.Pq Event 79H , Umask 18H
+Counts cycles DSB is delivered four uops. Set Cmask = 4.
+.It Li IDQ.ALL_MITE_CYCLES_ANY_UOPS
+.Pq Event 79H , Umask 24H
+Counts cycles MITE is delivered at least one uops. Set Cmask = 1.
+.It Li IDQ.ALL_MITE_CYCLES_4_UOPS
+.Pq Event 79H , Umask 24H
+Counts cycles MITE is delivered four uops. Set Cmask = 4.
+.It Li IDQ.MITE_ALL_UOPS
+.Pq Event 79H , Umask 3CH
+# of uops delivered to IDQ from any path.
+.It Li ICACHE.MISSES
+.Pq Event 80H , Umask 02H
+Number of Instruction Cache, Streaming Buffer and Victim Cache Misses.
+Includes UC accesses.
+.It Li ITLB_MISSES.MISS_CAUSES_A_WALK
+.Pq Event 85H , Umask 01H
+Misses in all ITLB levels that cause page walks.
+.It Li ITLB_MISSES.WALK_COMPLETED
+.Pq Event 85H , Umask 02H
+Misses in all ITLB levels that cause completed page walks.
+.It Li ITLB_MISSES.WALK_DURATION
+.Pq Event 85H , Umask 04H
+Cycle PMH is busy with a walk.
+.It Li ITLB_MISSES.STLB_HIT
+.Pq Event 85H , Umask 10H
+Number of cache load STLB hits. No page walk.
+.It Li ILD_STALL.LCP
+.Pq Event 87H , Umask 01H
+Stalls caused by changing prefix length of the instruction.
+.It Li ILD_STALL.IQ_FULL
+.Pq Event 87H , Umask 04H
+Stall cycles due to IQ is full.
+.It Li BR_INST_EXEC.COND
+.Pq Event 88H , Umask 01H
+Qualify conditional near branch instructions executed, but not necessarily
+retired.
+Must combine with umask 40H, 80H.
+.It Li BR_INST_EXEC.DIRECT_JMP
+.Pq Event 88H , Umask 02H
+Qualify all unconditional near branch instructions excluding calls and
+indirect branches.
+Must combine with umask 80H.
+.It Li BR_INST_EXEC.INDIRECT_JMP_NON_CALL_RET
+.Pq Event 88H , Umask 04H
+Qualify executed indirect near branch instructions that are not calls nor
+returns.
+Must combine with umask 80H.
+.It Li BR_INST_EXEC.RETURN_NEAR
+.Pq Event 88H , Umask 08H
+Qualify indirect near branches that have a return mnemonic.
+Must combine with umask 80H.
+.It Li BR_INST_EXEC.DIRECT_NEAR_CALL
+.Pq Event 88H , Umask 10H
+Qualify unconditional near call branch instructions, excluding non call
+branch, executed.
+Must combine with umask 80H.
+.It Li BR_INST_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 88H , Umask 20H
+Qualify indirect near calls, including both register and memory indirect,
+executed.
+Must combine with umask 80H.
+.It Li BR_INST_EXEC.NONTAKEN
+.Pq Event 88H , Umask 40H
+Qualify non-taken near branches executed.
+Applicable to umask 01H only.
+.It Li BR_INST_EXEC.TAKEN
+.Pq Event 88H , Umask 80H
+Qualify taken near branches executed. Must combine with 01H,02H, 04H, 08H,
+10H, 20H.
+.It Li BR_INST_EXEC.ALL_BRANCHES
+.Pq Event 88H , Umask FFH
+Counts all near executed branches (not necessarily retired).
+.It Li BR_MISP_EXEC.COND
+.Pq Event 89H , Umask 01H
+Qualify conditional near branch instructions mispredicted.
+Must combine with umask 40H, 80H.
+.It Li BR_MISP_EXEC.INDIRECT_JMP_NON_CALL_RET
+.Pq Event 89H , Umask 04H
+Qualify mispredicted indirect near branch instructions that are not calls
+nor returns.
+Must combine with umask 80H.
+.It Li BR_MISP_EXEC.RETURN_NEAR
+.Pq Event 89H , Umask 08H
+Qualify mispredicted indirect near branches that have a return mnemonic.
+Must combine with umask 80H.
+.It Li BR_MISP_EXEC.DIRECT_NEAR_CALL
+.Pq Event 89H , Umask 10H
+Qualify mispredicted unconditional near call branch instructions, excluding
+non call branch, executed.
+Must combine with umask 80H.
+.It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 89H , Umask 20H
+Qualify mispredicted indirect near calls, including both register and memory
+indirect, executed.
+Must combine with umask 80H.
+.It Li BR_MISP_EXEC.NONTAKEN
+.Pq Event 89H , Umask 40H
+Qualify mispredicted non-taken near branches executed.
+Applicable to umask 01H only.
+.It Li BR_MISP_EXEC.TAKEN
+.Pq Event 89H , Umask 80H
+Qualify mispredicted taken near branches executed. Must combine with
+01H,02H, 04H, 08H, 10H, 20H.
+.It Li BR_MISP_EXEC.ALL_BRANCHES
+.Pq Event 89H , Umask FFH
+Counts all near executed branches (not necessarily retired).
+.It Li IDQ_UOPS_NOT_DELIVERED.CORE
+.Pq Event 9CH , Umask 01H
+Count number of non-delivered uops to RAT per thread.
+Use Cmask to qualify uop b/w.
+.It Li UOPS_DISPATCHED_PORT.PORT_0
+.Pq Event A1H , Umask 01H
+Cycles which a Uop is dispatched on port 0.
+.It Li UOPS_DISPATCHED_PORT.PORT_1
+.Pq Event A1H , Umask 02H
+Cycles which a Uop is dispatched on port 1.
+.It Li UOPS_DISPATCHED_PORT.PORT_2_LD
+.Pq Event A1H , Umask 04H
+Cycles which a load uop is dispatched on port 2.
+.It Li UOPS_DISPATCHED_PORT.PORT_2_STA
+.Pq Event A1H , Umask 08H
+Cycles which a store address uop is dispatched on port 2.
+.It Li UOPS_DISPATCHED_PORT.PORT_2
+.Pq Event A1H , Umask 0CH
+Cycles which a Uop is dispatched on port 2.
+.It Li UOPS_DISPATCHED_PORT.PORT_3_LD
+.Pq Event A1H , Umask 10H
+Cycles which a load uop is dispatched on port 3.
+.It Li UOPS_DISPATCHED_PORT.PORT_3_STA
+.Pq Event A1H , Umask 20H
+Cycles which a store address uop is dispatched on port 3.
+.It Li UOPS_DISPATCHED_PORT.PORT_3
+.Pq Event A1H , Umask 30H
+Cycles which a Uop is dispatched on port 3.
+.It Li UOPS_DISPATCHED_PORT.PORT_4
+.Pq Event A1H , Umask 40H
+Cycles which a Uop is dispatched on port 4.
+.It Li UOPS_DISPATCHED_PORT.PORT_5
+.Pq Event A1H , Umask 80H
+Cycles which a Uop is dispatched on port 5.
+.It Li RESOURCE_STALLS.ANY
+.Pq Event A2H , Umask 01H
+Cycles Allocation is stalled due to Resource Related reason.
+.It Li RESOURCE_STALLS.RS
+.Pq Event A2H , Umask 04H
+Cycles stalled due to no eligible RS entry available.
+.It Li RESOURCE_STALLS.SB
+.Pq Event A2H , Umask 08H
+Cycles stalled due to no store buffers available. (not including draining
+form sync).
+.It Li RESOURCE_STALLS.ROB
+.Pq Event A2H , Umask 10H
+Cycles stalled due to re-order buffer full.
+.It Li DSB2MITE_SWITCHES.COUNT
+.Pq Event ABH , Umask 01H
+Number of DSB to MITE switches.
+.It Li DSB2MITE_SWITCHES.PENALTY_CYCLES
+.Pq Event ABH , Umask 02H
+Cycles DSB to MITE switches caused delay.
+.It Li DSB_FILL.EXCEED_DSB_LINES
+.Pq Event ACH , Umask 08H
+DSB Fill encountered > 3 DSB lines.
+.It Li ITLB.ITLB_FLUSH
+.Pq Event AEH , Umask 01H
+Counts the number of ITLB flushes, includes 4k/2M/4M pages.
+.It Li OFFCORE_REQUESTS.DEMAND_DATA_RD
+.Pq Event B0H , Umask 01H
+Demand data read requests sent to uncore.
+.It Li OFFCORE_REQUESTS.DEMAND_CODE_RD
+.Pq Event B0H , Umask 02H
+Demand code read requests sent to uncore.
+.It Li OFFCORE_REQUESTS.DEMAND_RFO
+.Pq Event B0H , Umask 04H
+Demand RFO read requests sent to uncore, including regular RFOs, locks,
+ItoM.
+.It Li OFFCORE_REQUESTS.ALL_DATA_RD
+.Pq Event B0H , Umask 08H
+Data read requests sent to uncore (demand and prefetch).
+.It Li UOPS_EXECUTED.THREAD
+.Pq Event B1H , Umask 01H
+Counts total number of uops to be executed per-thread each cycle. Set Cmask
+= 1, INV =1 to count stall cycles.
+.It Li UOPS_EXECUTED.CORE
+.Pq Event B1H , Umask 02H
+Counts total number of uops to be executed per-core each cycle.
+Do not need to set ANY.
+.It Li OFF_CORE_RESPONSE_0
+.Pq Event B7H , Umask 01H
+Off-core Response Performance Monitoring.
+PMC0 only.
+Requires programming MSR 01A6H.
+.It Li OFF_CORE_RESPONSE_1
+.Pq Event BBH , Umask 01H
+Off-core Response Performance Monitoring.
+PMC3 only.
+Requires programming MSR 01A7H.
+.It Li TLB_FLUSH.DTLB_THREAD
+.Pq Event BDH , Umask 01H
+DTLB flush attempts of the thread- specific entries.
+.It Li TLB_FLUSH.STLB_ANY
+.Pq Event BDH , Umask 20H
+Count number of STLB flush attempts.
+.It Li INST_RETIRED.ANY_P
+.Pq Event C0H , Umask 00H
+Number of instructions at retirement.
+.It Li INST_RETIRED.ALL
+.Pq Event C0H , Umask 01H
+Precise instruction retired event with HW to reduce effect of PEBS shadow in
+IP distribution.
+PMC1 only.
+Must quiesce other PMCs.
+.It Li OTHER_ASSISTS.AVX_STORE
+.Pq Event C1H , Umask 08H
+Number of assists associated with 256-bit AVX store operations.
+.It Li OTHER_ASSISTS.AVX_TO_SSE
+.Pq Event C1H , Umask 10H
+Number of transitions from AVX- 256 to legacy SSE when penalty applicable.
+.It Li OTHER_ASSISTS.SSE_TO_AVX
+.Pq Event C1H , Umask 20H
+Number of transitions from SSE to AVX-256 when penalty applicable.
+.It Li UOPS_RETIRED.ALL
+.Pq Event C2H , Umask 01H
+Counts the number of micro-ops retired, Use cmask=1 and invert to count
+active cycles or stalled cycles.
+Supports PEBS, use Any=1 for core granular.
+.It Li UOPS_RETIRED.RETIRE_SLOTS
+.Pq Event C2H , Umask 02H
+Counts the number of retirement slots used each cycle.
+.It Li MACHINE_CLEARS.MEMORY_ORDERING
+.Pq Event C3H , Umask 02H
+Counts the number of machine clears due to memory order conflicts.
+.It Li MACHINE_CLEARS.SMC
+.Pq Event C3H , Umask 04H
+Number of self-modifying-code machine clears detected.
+.It Li MACHINE_CLEARS.MASKMOV
+.Pq Event C3H , Umask 20H
+Counts the number of executed AVX masked load operations that refer to an
+illegal address range with the mask bits set to 0.
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H , Umask 00H
+Branch instructions at retirement.
+.It Li BR_INST_RETIRED.CONDITIONAL
+.Pq Event C4H , Umask 01H
+Counts the number of conditional branch instructions retired.
+Supports PEBS.
+.It Li BR_INST_RETIRED.NEAR_CALL
+.Pq Event C4H , Umask 02H
+Direct and indirect near call instructions retired.
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H , Umask 04H
+Counts the number of branch instructions retired.
+.It Li BR_INST_RETIRED.NEAR_RETURN
+.Pq Event C4H , Umask 08H
+Counts the number of near return instructions retired.
+.It Li BR_INST_RETIRED.NOT_TAKEN
+.Pq Event C4H , Umask 10H
+Counts the number of not taken branch instructions retired.
+.It Li BR_INST_RETIRED.NEAR_TAKEN
+.Pq Event C4H , Umask 20H
+Number of near taken branches retired.
+.It Li BR_INST_RETIRED.FAR_BRANCH
+.Pq Event C4H , Umask 40H
+Number of far branches retired.
+.It Li BR_MISP_RETIRED.ALL_BRANCHES
+.Pq Event C5H , Umask 00H
+Mispredicted branch instructions at retirement.
+.It Li BR_MISP_RETIRED.CONDITIONAL
+.Pq Event C5H , Umask 01H
+Mispredicted conditional branch instructions retired.
+Supports PEBS.
+.It Li BR_MISP_RETIRED.NEAR_CALL
+.Pq Event C5H , Umask 02H
+Direct and indirect mispredicted near call instructions retired.
+.It Li BR_MISP_RETIRED.ALL_BRANCHES
+.Pq Event C5H , Umask 04H
+Mispredicted macro branch instructions retired.
+.It Li BR_MISP_RETIRED.NOT_TAKEN
+.Pq Event C5H , Umask 10H
+Mispredicted not taken branch instructions retired.
+.It Li BR_MISP_RETIRED.TAKEN
+.Pq Event C5H , Umask 20H
+Mispredicted taken branch instructions retired.
+.It Li FP_ASSIST.X87_OUTPUT
+.Pq Event CAH , Umask 02H
+Number of X87 FP assists due to Output values.
+.It Li FP_ASSIST.X87_INPUT
+.Pq Event CAH , Umask 04H
+Number of X87 FP assists due to input values.
+.It Li FP_ASSIST.SIMD_OUTPUT
+.Pq Event CAH , Umask 08H
+Number of SIMD FP assists due to Output values.
+.It Li FP_ASSIST.SIMD_INPUT
+.Pq Event CAH , Umask 10H
+Number of SIMD FP assists due to input values.
+.It Li FP_ASSIST.ANY
+.Pq Event CAH , Umask 1EH
+Cycles with any input/output SSE* or FP assists.
+.It Li ROB_MISC_EVENTS.LBR_INSERTS
+.Pq Event CCH , Umask 20H
+Count cases of saving new LBR records by hardware.
+.It Li MEM_TRANS_RETIRED.LOAD_LATENCY
+.Pq Event CDH , Umask 01H
+Sample loads with specified latency threshold.
+PMC3 only.
+Specify threshold in MSR 0x3F6.
+.It Li MEM_TRANS_RETIRED.PRECISE_STORE
+.Pq Event CDH , Umask 02H
+Sample stores and collect precise store operation via PEBS record.
+PMC3 only.
+.It Li MEM_UOP_RETIRED.LOADS
+.Pq Event D0H , Umask 01H
+Qualify retired memory uops that are loads. Combine with umask 10H, 20H,
+40H, 80H.
+Supports PEBS.
+.It Li MEM_UOP_RETIRED.STORES
+.Pq Event D0H , Umask 02H
+Qualify retired memory uops that are stores. Combine with umask 10H, 20H,
+40H, 80H.
+.It Li MEM_UOP_RETIRED.STLB_MISS
+.Pq Event D0H , Umask 10H
+Qualify retired memory uops with STLB miss. Must combine with umask 01H,
+02H, to produce counts.
+.It Li MEM_UOP_RETIRED.LOCK
+.Pq Event D0H , Umask 20H
+Qualify retired memory uops with lock. Must combine with umask 01H, 02H, to
+produce counts.
+.It Li MEM_UOP_RETIRED.SPLIT
+.Pq Event D0H , Umask 40H
+Qualify retired memory uops with line split. Must combine with umask 01H,
+02H, to produce counts.
+.It Li MEM_UOP_RETIRED.ALL
+.Pq Event D0H , Umask 80H
+Qualify any retired memory uops. Must combine with umask 01H, 02H, to
+produce counts.
+.It Li MEM_LOAD_UOPS_RETIRED.L1_HIT
+.Pq Event D1H , Umask 01H
+Retired load uops with L1 cache hits as data sources.
+Supports PEBS.
+.It Li MEM_LOAD_UOPS_RETIRED.L2_HIT
+.Pq Event D1H , Umask 02H
+Retired load uops with L2 cache hits as data sources.
+.It Li MEM_LOAD_UOPS_RETIRED.LLC_HIT
+.Pq Event D1H , Umask 04H
+Retired load uops with LLC cache hits as data sources.
+.It Li MEM_LOAD_UOPS_RETIRED.HIT_LFB
+.Pq Event D1H , Umask 40H
+Retired load uops which data sources were load uops missed L1 but hit FB due
+to preceding miss to the same cache line with data not ready.
+.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS
+.Pq Event D2H , Umask 01H
+Retired load uops which data sources were LLC hit and cross-core snoop
+missed in on-pkg core cache.
+Supports PEBS.
+.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT
+.Pq Event D2H , Umask 02H
+Retired load uops which data sources were LLC and cross-core snoop hits in
+on-pkg core cache.
+Supports PEBS.
+.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM
+.Pq Event D2H , Umask 04H
+Retired load uops which data sources were HitM responses from shared LLC.
+.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE
+.Pq Event D2H , Umask 08H
+Retired load uops which data sources were hits in LLC without snoops
+required.
+.It Li MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM
+.Pq Event D3H , Umask 01H
+Retired load uops which data sources missed LLC but serviced from local
+dram.
+Supports PEBS.
+.It Li L2_TRANS.DEMAND_DATA_RD
+.Pq Event F0H , Umask 01H
+Demand Data Read requests that access L2 cache.
+.It Li L2_TRANS.RFO
+.Pq Event F0H , Umask 02H
+RFO requests that access L2 cache.
+.It Li L2_TRANS.CODE_RD
+.Pq Event F0H , Umask 04H
+L2 cache accesses when fetching instructions.
+.It Li L2_TRANS.ALL_PF
+.Pq Event F0H , Umask 08H
+Any MLC or LLC HW prefetch accessing L2, including rejects.
+.It Li L2_TRANS.L1D_WB
+.Pq Event F0H , Umask 10H
+L1D writebacks that access L2 cache.
+.It Li L2_TRANS.L2_FILL
+.Pq Event F0H , Umask 20H
+L2 fill requests that access L2 cache.
+.It Li L2_TRANS.L2_WB
+.Pq Event F0H , Umask 40H
+L2 writebacks that access L2 cache.
+.It Li L2_TRANS.ALL_REQUESTS
+.Pq Event F0H , Umask 80H
+Transactions accessing L2 pipe.
+.It Li L2_LINES_IN.I
+.Pq Event F1H , Umask 01H
+L2 cache lines in I state filling L2.
+Counting does not cover rejects.
+.It Li L2_LINES_IN.S
+.Pq Event F1H , Umask 02H
+L2 cache lines in S state filling L2.
+Counting does not cover rejects.
+.It Li L2_LINES_IN.E
+.Pq Event F1H , Umask 04H
+L2 cache lines in E state filling L2.
+Counting does not cover rejects.
+.It Li L2_LINES_IN.ALL
+.Pq Event F1H , Umask 07H
+L2 cache lines filling L2.
+Counting does not cover rejects.
+.It Li L2_LINES_OUT.DEMAND_CLEAN
+.Pq Event F2H , Umask 01H
+Clean L2 cache lines evicted by demand.
+.It Li L2_LINES_OUT.DEMAND_DIRTY
+.Pq Event F2H , Umask 02H
+Dirty L2 cache lines evicted by demand.
+.It Li L2_LINES_OUT.PF_CLEAN
+.Pq Event F2H , Umask 04H
+Clean L2 cache lines evicted by the MLC prefetcher.
+.It Li L2_LINES_OUT.PF_DIRTY
+.Pq Event F2H , Umask 08H
+Dirty L2 cache lines evicted by the MLC prefetcher.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.ucf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7 3 ,
+.Xr pmc.corei7uc 3 ,
+.Xr pmc.sandybridge 3 ,
+.Xr pmc.sandybridgeuc 3 ,
+.Xr pmc.westmere 3 ,
+.Xr pmc.westmereuc 3 ,
+.Xr pmc.soft 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
+The support for the Ivy Bridge
+microarchitecture was written by
+.An "Fabien Thomas"
+.Aq fabient@FreeBSD.org .
diff --git a/lib/libpmc/pmc.sandybridge.3 b/lib/libpmc/pmc.sandybridge.3
index f4a025502bbe..8572302efeea 100644
--- a/lib/libpmc/pmc.sandybridge.3
+++ b/lib/libpmc/pmc.sandybridge.3
@@ -25,11 +25,11 @@
.\" $FreeBSD$
.\"
.Dd February 12, 2012
-.Dt PMC.SANDYBRIDGE 3
-.Os
+.Dt PMC.SANDYBRIDGE 3
+.Os
.Sh NAME
.Nm pmc.sandybridge
-.Nd measurement events for
+.Nd measurement events for
.Tn Intel
.Tn Sandy Bridge
family CPUs
@@ -56,10 +56,10 @@ These PMCs are documented in
.El
.Pp
The number of PMCs available in each class and their widths need to be
-determined at run time by calling
+determined at run time by calling
.Xr pmc_cpuinfo 3 .
.Pp
-Intel Sandy Bridge PMCs are documented in
+Intel Sandy Bridge PMCs are documented in
.Rs
.%B "Intel(R) 64 and IA-32 Architectures Software Developers Manual"
.%T "Volume 3B: System Programming Guide, Part 2"
@@ -75,7 +75,7 @@ The programmable PMCs support the following capabilities:
.Bl -column "PMC_CAP_INTERRUPT" "Support"
.It Em Capability Ta Em Support
.It PMC_CAP_CASCADE Ta \&No
-.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_EDGE Ta Yes
.It PMC_CAP_INTERRUPT Ta Yes
.It PMC_CAP_INVERT Ta Yes
.It PMC_CAP_READ Ta Yes
@@ -93,55 +93,80 @@ qualifiers:
.It Li rsp= Ns Ar value
Configure the Off-core Response bits.
.Bl -tag -width indent
-.It Li DMND_DATA_RD
-Counts the number of demand and DCU prefetch data reads of full
-and partial cachelines as well as demand data page table entry
-cacheline reads.
-Does not count L2 data read prefetches or instruction fetches.
-.It Li DMND_RFO
-Counts the number of demand and DCU prefetch reads for ownership
-(RFO) requests generated by a write to data cacheline.
-Does not count L2 RFO.
-.It Li DMND_IFETCH
-Counts the number of demand and DCU prefetch instruction cacheline
-reads.
+.It Li REQ_DMND_DATA_RD
+Counts the number of demand and DCU prefetch data reads of full and partial
+cachelines as well as demand data page table entry cacheline reads. Does not
+count L2 data read prefetches or instruction fetches.
+.It Li REQ_DMND_RFO
+Counts the number of demand and DCU prefetch reads for ownership (RFO)
+requests generated by a write to data cacheline. Does not count L2 RFO
+prefetches.
+.It Li REQ_DMND_IFETCH
+Counts the number of demand and DCU prefetch instruction cacheline reads.
Does not count L2 code read prefetches.
-.It Li WB
+.It Li REQ_WB
Counts the number of writeback (modified to exclusive) transactions.
-.It Li PF_DATA_RD
+.It Li REQ_PF_DATA_RD
Counts the number of data cacheline reads generated by L2 prefetchers.
-.It Li PF_RFO
+.It Li REQ_PF_RFO
Counts the number of RFO requests generated by L2 prefetchers.
-.It Li PF_IFETCH
+.It Li REQ_PF_IFETCH
Counts the number of code reads generated by L2 prefetchers.
-.It Li OTHER
-Counts one of the following transaction types, including L3 invalidate,
-I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences,
-lock, unlock, split lock.
-.It Li UNCORE_HIT
-L3 Hit: local or remote home requests that hit L3 cache in the uncore
-with no coherency actions required (snooping).
-.It Li OTHER_CORE_HIT_SNP
-L3 Hit: local or remote home requests that hit L3 cache in the uncore
-and was serviced by another core with a cross core snoop where no modified
-copies were found (clean).
-.It Li OTHER_CORE_HITM
-L3 Hit: local or remote home requests that hit L3 cache in the uncore
-and was serviced by another core with a cross core snoop where modified
-copies were found (HITM).
-.It Li REMOTE_CACHE_FWD
-L3 Miss: local homed requests that missed the L3 cache and was serviced
-by forwarded data following a cross package snoop where no modified
-copies found.
-(Remote home requests are not counted)
-.It Li REMOTE_DRAM
-L3 Miss: remote home requests that missed the L3 cache and were serviced
-by remote DRAM.
-.It Li LOCAL_DRAM
-L3 Miss: local home requests that missed the L3 cache and were serviced
-by local DRAM.
-.It Li NON_DRAM
-Non-DRAM requests that were serviced by IOH.
+.It Li REQ_PF_LLC_DATA_RD
+L2 prefetcher to L3 for loads.
+.It Li REQ_PF_LLC_RFO
+RFO requests generated by L2 prefetcher
+.It Li REQ_PF_LLC_IFETCH
+L2 prefetcher to L3 for instruction fetches.
+.It Li REQ_BUS_LOCKS
+Bus lock and split lock requests.
+.It Li REQ_STRM_ST
+Streaming store requests.
+.It Li REQ_OTHER
+Any other request that crosses IDI, including I/O.
+.It Li RES_ANY
+Catch all value for any response types.
+.It Li RES_SUPPLIER_NO_SUPP
+No Supplier Information available.
+.It Li RES_SUPPLIER_LLC_HITM
+M-state initial lookup stat in L3.
+.It Li RES_SUPPLIER_LLC_HITE
+E-state.
+.It Li RES_SUPPLIER_LLC_HITS
+S-state.
+.It Li RES_SUPPLIER_LLC_HITF
+F-state.
+.It Li RES_SUPPLIER_LOCAL
+Local DRAM Controller.
+.It Li RES_SNOOP_SNPI_NONE
+No details on snoop-related information.
+.It Li RES_SNOOP_SNP_NO_NEEDED
+No snoop was needed to satisfy the request.
+.It Li RES_SNOOP_SNP_MISS
+A snoop was needed and it missed all snooped caches:
+-For LLC Hit, ReslHitl was returned by all cores
+-For LLC Miss, Rspl was returned by all sockets and data was returned from
+DRAM.
+.It Li RES_SNOOP_HIT_NO_FWD
+A snoop was needed and it hits in at least one snooped cache. Hit denotes a
+cache-line was valid before snoop effect. This includes:
+-Snoop Hit w/ Invalidation (LLC Hit, RFO)
+-Snoop Hit, Left Shared (LLC Hit/Miss, IFetch/Data_RD)
+-Snoop Hit w/ Invalidation and No Forward (LLC Miss, RFO Hit S)
+In the LLC Miss case, data is returned from DRAM.
+.It Li RES_SNOOP_HIT_FWD
+A snoop was needed and data was forwarded from a remote socket.
+This includes:
+-Snoop Forward Clean, Left Shared (LLC Hit/Miss, IFetch/Data_RD/RFT).
+.It Li RES_SNOOP_HITM
+A snoop was needed and it HitM-ed in local or remote cache. HitM denotes a
+cache-line was in modified state before effect as a results of snoop. This
+includes:
+-Snoop HitM w/ WB (LLC miss, IFetch/Data_RD)
+-Snoop Forward Modified w/ Invalidation (LLC Hit/Miss, RFO)
+-Snoop MtoS (LLC Hit, IFetch/Data_RD).
+.It Li RES_NON_DRAM
+Target was non-DRAM system address. This includes MMIO transactions.
.El
.It Li cmask= Ns Ar value
Configure the PMC to increment only if the number of configured
@@ -177,276 +202,276 @@ qualifiers are specified, the default is to enable both.
Sandy Bridge programmable PMCs support the following events:
.Bl -tag -width indent
.It Li LD_BLOCKS.DATA_UNKNOWN
-.Pq EVENT_03H, Umask 01H
+.Pq EVENT_03H, Umask 01H
Blocked loads due to store buffer blocks with unknown data.
.It Li LD_BLOCKS.STORE_FORWARD
-.Pq Event 03H, Umask 02H
+.Pq Event 03H, Umask 02H
Loads blocked by overlapping with store buffer that cannot be forwarded.
.It Li LD_BLOCKS.NO_SR
-.Pq Event 03H, Umask 08H
+.Pq Event 03H, Umask 08H
# of Split loads blocked due to resource not available.
-.It Li LD_BLOCKS.ALL_BLOCK
-.Pq EVENT_03H, Umask 10H
+.It Li LD_BLOCKS.ALL_BLOCK
+.Pq EVENT_03H, Umask 10H
Number of cases where any load is blocked but has no DCU miss.
-.It Li MISALIGN_MEM_REF.LOADS
-.Pq Event 05H, Umask 01H
+.It Li MISALIGN_MEM_REF.LOADS
+.Pq Event 05H, Umask 01H
Speculative cache-line split load uops dispatched to L1D.
.It Li MISALIGN_MEM_REF.STORES
-.Pq Event 05H, Umask 02H
+.Pq Event 05H, Umask 02H
Speculative cache-line split Store-address uops dispatched to L1D.
.It Li LD_BLOCKS_PARTIAL.ADDRESS_ALIAS
-.Pq Event 07H, Umask 01H
+.Pq Event 07H, Umask 01H
False dependencies in MOB due to partial compare on address.
-.It Li LD_BLOCKS_PARTIAL.ALL_STA_BLOCK
-.Pq Event 07H, Umask 08H
-The number of times that load operations are temporarily blocked because of
-older stores, with addresses that are not yet known.
+.It Li LD_BLOCKS_PARTIAL.ALL_STA_BLOCK
+.Pq Event 07H, Umask 08H
+The number of times that load operations are temporarily blocked because of
+older stores, with addresses that are not yet known.
A load operation may incur more than one block of this type.
.It LI DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK
-.Pq Event 08H, Umask 01H
+.Pq Event 08H, Umask 01H
Misses in all TLB levels that cause a page walk of any page size.
-.It Li DTLB_LOAD_MISSES.WALK_COMPLETED
-.Pq Event 08H, Umask 02H
+.It Li DTLB_LOAD_MISSES.WALK_COMPLETED
+.Pq Event 08H, Umask 02H
Misses in all TLB levels that caused page walk completed of any size.
-.It Li DTLB_LOAD_MISSES.WALK_DURATION
-.Pq Event 08H, Umask 04H
+.It Li DTLB_LOAD_MISSES.WALK_DURATION
+.Pq Event 08H, Umask 04H
Cycle PMH is busy with a walk.
.It Li DTLB_LOAD_MISSES.STLB_HIT
-.Pq Event 08H, Umask 10H
-Number of cache load STLB hits.
+.Pq Event 08H, Umask 10H
+Number of cache load STLB hits.
No page walk.
-.It Li INT_MISC.RECOVERY_CYCLES
-.Pq Event 0DH, Umask 03H
-Cycles waiting to recover after Machine Clears or JEClear.
+.It Li INT_MISC.RECOVERY_CYCLES
+.Pq Event 0DH, Umask 03H
+Cycles waiting to recover after Machine Clears or JEClear.
Set Cmask = 1.
Set Edge to count occurrences
-.It Li INT_MISC.RAT_STALL_CYCLES
-.Pq Event 0DH, Umask 40H
+.It Li INT_MISC.RAT_STALL_CYCLES
+.Pq Event 0DH, Umask 40H
Cycles RAT external stall is sent to IDQ for this thread.
.It Li UOPS_ISSUED.ANY
-.Pq Event 0EH, Umask 01H
-Increments each cycle the # of Uops issued by the RAT to RS.
+.Pq Event 0EH, Umask 01H
+Increments each cycle the # of Uops issued by the RAT to RS.
Set Cmask = 1, Inv = 1, Any= 1 to count stalled cycles of this core.
Set Cmask = 1, Inv = 1 to count stalled cycles
-.It Li FP_COMP_OPS_EXE.X87
-.Pq Event 10H, Umask 01H
+.It Li FP_COMP_OPS_EXE.X87
+.Pq Event 10H, Umask 01H
Counts number of X87 uops executed.
-.It Li FP_COMP_OPS_EXE.SSE_FP_PACKED_DOUBLE
-.Pq Event 10H, Umask 10H
+.It Li FP_COMP_OPS_EXE.SSE_FP_PACKED_DOUBLE
+.Pq Event 10H, Umask 10H
Counts number of SSE* double precision FP packed uops executed.
.It Li FP_COMP_OPS_EXE.SSE_FP_SCALAR_SINGLE
-.Pq Event 10H, Umask 20H
+.Pq Event 10H, Umask 20H
Counts number of SSE* single precision FP scalar uops executed.
.It Li FP_COMP_OPS_EXE.SSE_PACKED_SINGLE
.Pq Event 10H, Umask 40H
Counts number of SSE* single precision FP packed uops executed.
-.It LiFP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE
+.It LiFP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE
.Pq Event 10H, Umask 80H
Counts number of SSE* double precision FP scalar uops executed.
.It Li SIMD_FP_256.PACKED_SINGLE
-.Pq Event 11H, Umask 01H
+.Pq Event 11H, Umask 01H
Counts 256-bit packed single-precision floating-point instructions.
.It Li SIMD_FP_256.PACKED_DOUBLE
.Pq Event 11H, Umask 02H
Counts 256-bit packed double-precision floating-point instructions.
.It Li ARITH.FPU_DIV_ACTIVE
-.Pq Event 14H, Umask 01H
+.Pq Event 14H, Umask 01H
Cycles that the divider is active, includes INT and FP.
Set 'edge =1, cmask=1' to count the number of divides.
-.It Li INSTS_WRITTEN_TO_IQ.INSTS
-.Pq Event 17H, Umask 01H
+.It Li INSTS_WRITTEN_TO_IQ.INSTS
+.Pq Event 17H, Umask 01H
Counts the number of instructions written into the IQ every cycle.
-.It Li L2_RQSTS.DEMAND_DATA_RD_HIT
-.Pq Event 24H, Umask 01H
+.It Li L2_RQSTS.DEMAND_DATA_RD_HIT
+.Pq Event 24H, Umask 01H
Demand Data Read requests that hit L2 cache.
.It Li L2_RQSTS.ALL_DEMAND_DATA_RD
-.Pq Event 24H, Umask 03H
+.Pq Event 24H, Umask 03H
Counts any demand and L1 HW prefetch data load requests to L2.
.It Li L2_RQSTS.RFO_HITS
.Pq Event 24H, Umask 04H
Counts the number of store RFO requests that hit the L2 cache.
-.It Li L2_RQSTS.RFO_MISS
+.It Li L2_RQSTS.RFO_MISS
.Pq Event 24H, Umask 08H
Counts the number of store RFO requests that miss the L2 cache.
-.It Li L2_RQSTS.ALL_RFO
-.Pq Event 24H, Umask 0CH
+.It Li L2_RQSTS.ALL_RFO
+.Pq Event 24H, Umask 0CH
Counts all L2 store RFO requests.
-.It Li L2_RQSTS.CODE_RD_HIT
+.It Li L2_RQSTS.CODE_RD_HIT
.Pq Event 24H, Umask 10H
Number of instruction fetches that hit the L2 cache.
-.It Li L2_RQSTS.CODE_RD_MISS
+.It Li L2_RQSTS.CODE_RD_MISS
.Pq Event 24H, Umask 20H
Number of instruction fetches that missed the L2 cache.
-.It Li L2_RQSTS.ALL_CODE_RD
-.Pq Event 24H, Umask 30H
+.It Li L2_RQSTS.ALL_CODE_RD
+.Pq Event 24H, Umask 30H
Counts all L2 code requests.
-.It Li L2_RQSTS.PF_HIT
-.Pq Event 24H, Umask 40H
+.It Li L2_RQSTS.PF_HIT
+.Pq Event 24H, Umask 40H
Requests from L2 Hardware prefetcher that hit L2.
-.It Li L2_RQSTS.PF_MISS
+.It Li L2_RQSTS.PF_MISS
.Pq Event 24H, Umask 80H
Requests from L2 Hardware prefetcher that missed L2.
-.It Li L2_RQSTS.ALL_PF
+.It Li L2_RQSTS.ALL_PF
.Pq Event 24H, Umask C0H
Any requests from L2 Hardware prefetchers.
-.It Li L2_STORE_LOCK_RQSTS.MISS
-.Pq Event 27H, Umask 01H
+.It Li L2_STORE_LOCK_RQSTS.MISS
+.Pq Event 27H, Umask 01H
RFOs that miss cache lines.
-.It Li L2_STORE_LOCK_RQSTS.HIT_E
-.Pq Event 27H, Umask 04H
+.It Li L2_STORE_LOCK_RQSTS.HIT_E
+.Pq Event 27H, Umask 04H
RFOs that hit cache lines in E state.
.It Li L2_STORE_LOCK_RQSTS.HIT_M
.Pq EVENT_27H, Umask 08H
RFOs that hit cache lines in M state.
-.It Li L2_STORE_LOCK_RQSTS.ALL
-.Pq EVENT_27H, Umask 0FH
+.It Li L2_STORE_LOCK_RQSTS.ALL
+.Pq EVENT_27H, Umask 0FH
RFOs that access cache lines in any state.
-.It Li L2_L1D_WB_RQSTS.HIT_E
-.Pq Event 28H, Umask 04H
+.It Li L2_L1D_WB_RQSTS.HIT_E
+.Pq Event 28H, Umask 04H
Not rejected writebacks from L1D to L2 cache lines in E state.
-.It Li L2_L1D_WB_RQSTS.HIT_M
-.Pq Event 28H, Umask 08H
+.It Li L2_L1D_WB_RQSTS.HIT_M
+.Pq Event 28H, Umask 08H
Not rejected writebacks from L1D to L2 cache lines in M state.
-.It Li LONGEST_LAT_CACHE.REFERENCE
+.It Li LONGEST_LAT_CACHE.REFERENCE
.Pq Event 2EH, Umask 4FH
-This event counts requests originating from the core that reference a cache
+This event counts requests originating from the core that reference a cache
line in the last level cache.
-.It Li LONGEST_LAT_CACHE.MISS
-.Pq Event 2EH, Umask 41H
-This event counts each cache miss condition for references to the last level
+.It Li LONGEST_LAT_CACHE.MISS
+.Pq Event 2EH, Umask 41H
+This event counts each cache miss condition for references to the last level
cache.
-.It Li CPU_CLK_UNHALTED.THREAD_P
-.Pq Event 3CH, Umask 00H
-Counts the number of thread cycles while the thread is not in a halt state.
-The thread enters the halt state when it is running the HLT instruction.
-The core frequency may change from time to time due to power or thermal
+.It Li CPU_CLK_UNHALTED.THREAD_P
+.Pq Event 3CH, Umask 00H
+Counts the number of thread cycles while the thread is not in a halt state.
+The thread enters the halt state when it is running the HLT instruction.
+The core frequency may change from time to time due to power or thermal
throttling.
-.It Li CPU_CLK_THREAD_UNHALTED.REF_XCLK
-.Pq Event 3CH, Umask 01H
+.It Li CPU_CLK_THREAD_UNHALTED.REF_XCLK
+.Pq Event 3CH, Umask 01H
Increments at the frequency of XCLK (100 MHz) when not halted.
-.It Li L1D_PEND_MISS.PENDING
-.Pq Event 48H, Umask 01H
-Increments the number of outstanding L1D misses every cycle.
+.It Li L1D_PEND_MISS.PENDING
+.Pq Event 48H, Umask 01H
+Increments the number of outstanding L1D misses every cycle.
Set Cmask = 1 and Edge =1 to count occurrences.
Counter 2 only; Set Cmask = 1 to count cycles.
-.It Li DTLB_STORE_MISSES.MISS_CAUSES_A_WALK
-.Pq Event 49H, Umask 01H Miss in all TLB levels causes an page walk of any
+.It Li DTLB_STORE_MISSES.MISS_CAUSES_A_WALK
+.Pq Event 49H, Umask 01H Miss in all TLB levels causes an page walk of any
page size (4K/2M/4M/1G).
-.It Li DTLB_STORE_MISSES.WALK_COMPLETED
-.Pq Event 49H, Umask 02H
-Miss in all TLB levels causes a page walk that completes of any page size
+.It Li DTLB_STORE_MISSES.WALK_COMPLETED
+.Pq Event 49H, Umask 02H
+Miss in all TLB levels causes a page walk that completes of any page size
(4K/2M/4M/1G).
-.It Li DTLB_STORE_MISSES.WALK_DURATION
-.Pq Event 49H, Umask 04H
+.It Li DTLB_STORE_MISSES.WALK_DURATION
+.Pq Event 49H, Umask 04H
Cycles PMH is busy with this walk.
-.It Li DTLB_STORE_MISSES.STLB_HIT
-.Pq Event 49H, Umask 10H
-Store operations that miss the first TLB level but hit the second and do not
+.It Li DTLB_STORE_MISSES.STLB_HIT
+.Pq Event 49H, Umask 10H
+Store operations that miss the first TLB level but hit the second and do not
cause page walks.
-.It Li LOAD_HIT_PRE.SW_PF
-.Pq Event 4CH, Umask 01H
+.It Li LOAD_HIT_PRE.SW_PF
+.Pq Event 4CH, Umask 01H
Not SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.
-.It Li LOAD_HIT_PER.HW_PF
-.Pq Event 4CH, Umask 02H
+.It Li LOAD_HIT_PER.HW_PF
+.Pq Event 4CH, Umask 02H
Not SW-prefetch load dispatches that hit fill buffer allocated for H/W prefetch.
-.It Li HW_PRE_REQ.DL1_MISS
-.Pq Event 4EH, Umask 02H
-Hardware Prefetch requests that miss the L1D cache.
-A request is being counted each time it access the cache & miss it, including
+.It Li HW_PRE_REQ.DL1_MISS
+.Pq Event 4EH, Umask 02H
+Hardware Prefetch requests that miss the L1D cache.
+A request is being counted each time it access the cache & miss it, including
if a block is applicable or if hit the Fill Buffer for example.
This accounts for both L1 streamer and IP-based (IPP) HW prefetchers.
-.It Li L1D.REPLACEMENT
+.It Li L1D.REPLACEMENT
.Pq Event 51H, Umask 01H
Counts the number of lines brought into the L1 data cache.
-.It Li L1D.ALLOCATED_IN_M
-.Pq Event 51H, Umask 02H
+.It Li L1D.ALLOCATED_IN_M
+.Pq Event 51H, Umask 02H
Counts the number of allocations of modified L1D cache lines.
.It Li L1D.EVICTION
.Pq Event 51H, Umask 04H
-Counts the number of modified lines evicted from the L1 data cache due to
+Counts the number of modified lines evicted from the L1 data cache due to
replacement.
.It Li L1D.ALL_M_REPLACEMENT
-.Pq Event 51H, Umask 08H
-Cache lines in M state evicted out of L1D due to Snoop HitM or dirty line
+.Pq Event 51H, Umask 08H
+Cache lines in M state evicted out of L1D due to Snoop HitM or dirty line
replacement.
.It Li PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP
-.Pq Event 59H, Umask 20H
+.Pq Event 59H, Umask 20H
Increments the number of flags-merge uops in flight each cycle.
Set Cmask = 1 to count cycles.
.It Li PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW
-.Pq Event 59H, Umask 40H
+.Pq Event 59H, Umask 40H
Cycles with at least one slow LEA uop allocated.
.It Li PARTIAL_RAT_STALLS.MUL_SINGLE_UOP
-.Pq Event 59H, Umask 80H
+.Pq Event 59H, Umask 80H
Number of Multiply packed/scalar single precision uops allocated.
-.It Li RESOURCE_STALLS2.ALL_FL_EMPTY
-.Pq Event 5BH, Umask 0CH
+.It Li RESOURCE_STALLS2.ALL_FL_EMPTY
+.Pq Event 5BH, Umask 0CH
Cycles stalled due to free list empty.
-.It Li RESOURCE_STALLS2.ALL_PRF_CONTROL
-.Pq Event 5BH, Umask 0FH
+.It Li RESOURCE_STALLS2.ALL_PRF_CONTROL
+.Pq Event 5BH, Umask 0FH
Cycles stalled due to control structures full for physical registers.
-.It Li RESOURCE_STALLS2.BOB_FULL
-.Pq Event 5BH, Umask 40H
+.It Li RESOURCE_STALLS2.BOB_FULL
+.Pq Event 5BH, Umask 40H
Cycles Allocator is stalled due to Branch Order Buffer.
.It Li RESOURCE_STALLS2.OOO_RSRC
-.Pq Event 5BH, Umask 4FH
+.Pq Event 5BH, Umask 4FH
Cycles stalled due to out of order resources full.
.It Li CPL_CYCLES.RING0
-.Pq Event 5CH, Umask 01H
+.Pq Event 5CH, Umask 01H
Unhalted core cycles when the thread is in ring 0.
Use Edge to count transition
.It Li CPL_CYCLES.RING123
.Pq Event 5CH, Umask 02H
Unhalted core cycles when the thread is not in ring 0.
.It Li RS_EVENTS.EMPTY_CYCLES
-.Pq Event 5EH, Umask 01H
+.Pq Event 5EH, Umask 01H
Cycles the RS is empty for the thread.
.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD
-.Pq Event 60H, Umask 01H
+.Pq Event 60H, Umask 01H
Offcore outstanding Demand Data Read transactions in SQ to uncore.
Set Cmask=1 to count cycles.
.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO
-.Pq Event 60H, Umask 04H
+.Pq Event 60H, Umask 04H
Offcore outstanding RFO store transactions in SQ to uncore.
Set Cmask=1 to count cycles.
.It Li OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD
-.Pq Event 60H, Umask 08H
+.Pq Event 60H, Umask 08H
Offcore outstanding cacheable data read transactions in SQ to uncore.
Set Cmask=1 to count cycles.
.It Li LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION
-.Pq Event 63H, Umask 01H
+.Pq Event 63H, Umask 01H
Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.
.It Li LOCK_CYCLES.CACHE_LOCK_DURATION
.Pq Event 63H, Umask 02H
Cycles in which the L1D is locked.
.It Li IDQ.EMPTY
-.Pq Event 79H, Umask 02H
+.Pq Event 79H, Umask 02H
Counts cycles the IDQ is empty.
-.It Li IQD.MITE_UOPS
-.Pq Event 79H, Umask 04H
-Increment each cycle # of uops delivered to IDQ from MITE path.
+.It Li IQD.MITE_UOPS
+.Pq Event 79H, Umask 04H
+Increment each cycle # of uops delivered to IDQ from MITE path.
Set Cmask = 1 to count cycles.
Can combine Umask 04H and 20H
.It Li IDQ.DSB_UOPS
-.Pq Event 79H, Umask 08H
-Increment each cycle.
-# of uops delivered to IDQ from DSB path.
+.Pq Event 79H, Umask 08H
+Increment each cycle.
+# of uops delivered to IDQ from DSB path.
Set Cmask = 1 to count cycles.
Can combine Umask 08H and 10H
-.It Li IDQ.MS_DSB_UOPS
-.Pq Event 79H, Umask 10H
-Increment each cycle # of uops delivered to IDQ when MS busy by DSB.
-Set Cmask = 1 to count cycles MS is busy.
+.It Li IDQ.MS_DSB_UOPS
+.Pq Event 79H, Umask 10H
+Increment each cycle # of uops delivered to IDQ when MS busy by DSB.
+Set Cmask = 1 to count cycles MS is busy.
Set Cmask=1 and Edge=1 to count MS activations.
Can combine Umask 08H and 10H
.It Li IDQ.MS_MITE_UOPS
-.Pq Event 79H, Umask 20H
-Increment each cycle # of uops delivered to IDQ when MS is busy by MITE.
+.Pq Event 79H, Umask 20H
+Increment each cycle # of uops delivered to IDQ when MS is busy by MITE.
Set Cmask = 1 to count cycles.
Can combine Umask 04H and 20H
-.It Li IDQ.MS_UOPS
-.Pq Event 79H, Umask 30H
-Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE.
+.It Li IDQ.MS_UOPS
+.Pq Event 79H, Umask 30H
+Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE.
Set Cmask = 1 to count cycles.
Can combine Umask 04H, 08H and 30H
.It Li ICACHE.MISSES
@@ -457,38 +482,38 @@ Includes UC accesses.
.Pq Event 85H, Umask 01H
Misses in all ITLB levels that cause page walks.
.It Li ITLB_MISSES.WALK_COMPLETED
-.Pq Event 85H, Umask 02H
+.Pq Event 85H, Umask 02H
Misses in all ITLB levels that cause completed page walks.
-.It Li ITLB_MISSES.WALK_DURATION
-.Pq Event 85H, Umask 04H
+.It Li ITLB_MISSES.WALK_DURATION
+.Pq Event 85H, Umask 04H
Cycle PMH is busy with a walk.
.It Li ITLB_MISSES.STLB_HIT
-.Pq Event 85H, Umask 10H
-Number of cache load STLB hits.
+.Pq Event 85H, Umask 10H
+Number of cache load STLB hits.
No page walk.
.It Li ILD_STALL.LCP
-.Pq Event 87H, Umask 01H
+.Pq Event 87H, Umask 01H
Stalls caused by changing prefix length of the instruction.
.It Li ILD_STALL.IQ_FULL
-.Pq Event 87H, Umask 04H
+.Pq Event 87H, Umask 04H
Stall cycles due to IQ is full.
.It Li BR_INST_EXEC.COND
-.Pq Event 88H, Umask 01H
-Qualify conditional near branch instructions executed, but not necessarily
+.Pq Event 88H, Umask 01H
+Qualify conditional near branch instructions executed, but not necessarily
retired.
Must combine with umask 40H, 80H
-.It Li BR_INST_EXEC.DIRECT_JMP
-.Pq Event 88H, Umask 02H
-Qualify all unconditional near branch instructions excluding calls and indirect
+.It Li BR_INST_EXEC.DIRECT_JMP
+.Pq Event 88H, Umask 02H
+Qualify all unconditional near branch instructions excluding calls and indirect
branches.
Must combine with umask 80H
.It Li BR_INST_EXEC.INDIRECT_JMP_NON_CALL_RET
-.Pq Event 88H, Umask 04H
-Qualify executed indirect near branch instructions that are not calls nor
+.Pq Event 88H, Umask 04H
+Qualify executed indirect near branch instructions that are not calls nor
returns.
Must combine with umask 80H
.It Li BR_INST_EXEC.RETURN_NEAR
-.Pq Event 88H, Umask 08H
+.Pq Event 88H, Umask 08H
Qualify indirect near branches that have a return mnemonic.
Must combine with umask 80H
.It Li BR_INST_EXEC.DIRECT_NEAR_CALL
@@ -497,49 +522,49 @@ Qualify unconditional near call branch instructions, excluding non call branch,
executed.
Must combine with umask 80H
.It Li BR_INST_EXEC.INDIRECT_NEAR_CALL
-.Pq Event 88H, Umask 20H
-Qualify indirect near calls, including both register and memory indirect,
+.Pq Event 88H, Umask 20H
+Qualify indirect near calls, including both register and memory indirect,
executed.
Must combine with umask 80H
.It Li BR_INST_EXEC.NONTAKEN
-.Pq Event 88H, Umask 40H
+.Pq Event 88H, Umask 40H
Qualify non-taken near branches executed.
Applicable to umask 01H only
.It Li BR_INST_EXEC.TAKEN
-.Pq Event 88H, Umask 80H
-Qualify taken near branches executed.
+.Pq Event 88H, Umask 80H
+Qualify taken near branches executed.
Must combine with 01H,02H, 04H, 08H, 10H, 20H
.It Li BR_INST_EXE.ALL_BRANCHES
-.Pq Event 88H, Umask FFH
+.Pq Event 88H, Umask FFH
Counts all near executed branches (not necessarily retired).
.It Li BR_MISP_EXEC.COND
-.Pq Event 89H, Umask 01H
-Qualify conditional near branch instructions mispredicted.
+.Pq Event 89H, Umask 01H
+Qualify conditional near branch instructions mispredicted.
Must combine with umask 40H, 80H
.It Li BR_MISP_EXEC.INDIRECT_JMP_NON_CALL_RET
-.Pq Event 89H, Umask 04H
-Qualify mispredicted indirect near branch instructions that are not calls nor
+.Pq Event 89H, Umask 04H
+Qualify mispredicted indirect near branch instructions that are not calls nor
returns.
Must combine with umask 80H
-.It Li BR_MISP_EXEC.RETURN_NEAR
-.Pq Event 89H, Umask 08H
+.It Li BR_MISP_EXEC.RETURN_NEAR
+.Pq Event 89H, Umask 08H
Qualify mispredicted indirect near branches that have a return mnemonic.
Must combine with umask 80H
.It Li BR_MISP_EXEC.DIRECT_NEAR_CALL
-.Pq Event 89H, Umask 10H
-Qualify mispredicted unconditional near call branch instructions, excluding non
+.Pq Event 89H, Umask 10H
+Qualify mispredicted unconditional near call branch instructions, excluding non
call branch, executed.
Must combine with umask 80H
.It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL
-.Pq Event 89H, Umask 20H
-Qualify mispredicted indirect near calls, including both register and memory
+.Pq Event 89H, Umask 20H
+Qualify mispredicted indirect near calls, including both register and memory
indirect, executed.
Must combine with umask 80H
.It Li BR_MISP_EXEC.NONTAKEN
-.Pq Event 89H, Umask 40H
+.Pq Event 89H, Umask 40H
Qualify mispredicted non-taken near branches executed.
Applicable to umask 01H only
-.It Li BR_MISP_EXEC.TAKEN
+.It Li BR_MISP_EXEC.TAKEN
.Pq Event 89H, Umask 80H
Qualify mispredicted taken near branches executed.
Must combine with 01H,02H, 04H, 08H, 10H, 20H
@@ -547,7 +572,7 @@ Must combine with 01H,02H, 04H, 08H, 10H, 20H
.Pq Event 89H, Umask FFH
Counts all near executed branches (not necessarily retired).
.It Li IDQ_UOPS_NOT_DELIVERED.CORE
-.Pq Event 9CH, Umask 01H
+.Pq Event 9CH, Umask 01H
Count number of non-delivered uops to RAT per thread.
Use Cmask to qualify uop b/w
.It Li UOPS_DISPATCHED_PORT.PORT_0
@@ -557,28 +582,28 @@ Cycles which a Uop is dispatched on port 0.
.Pq Event A1H, Umask 02H
Cycles which a Uop is dispatched on port 1.
.It Li UOPS_DISPATCHED_PORT.PORT_2_LD
-.Pq Event A1H, Umask 04H
+.Pq Event A1H, Umask 04H
Cycles which a load uop is dispatched on port 2.
.It Li UOPS_DISPATCHED_PORT.PORT_2_STA
.Pq Event A1H, Umask 08H
Cycles which a store address uop is dispatched on port 2.
.It Li UOPS_DISPATCHED_PORT.PORT_2
-.Pq Event A1H, Umask 0CH
+.Pq Event A1H, Umask 0CH
Cycles which a Uop is dispatched on port 2.
.It Li UOPS_DISPATCHED_PORT.PORT_3_LD
-.Pq Event A1H, Umask 10H
+.Pq Event A1H, Umask 10H
Cycles which a load uop is dispatched on port 3.
-.It Li UOPS_DISPATCHED_PORT.PORT_3_STA
-.Pq Event A1H, Umask 20H
+.It Li UOPS_DISPATCHED_PORT.PORT_3_STA
+.Pq Event A1H, Umask 20H
Cycles which a store address uop is dispatched on port 3.
-.It Li UOPS_DISPATCHED_PORT.PORT_3
-.Pq Event A1H, Umask 30H
+.It Li UOPS_DISPATCHED_PORT.PORT_3
+.Pq Event A1H, Umask 30H
.Pq Cycles which a Uop is dispatched on port 3.
-.It Li UOPS_DISPATCHED_PORT.PORT_4
+.It Li UOPS_DISPATCHED_PORT.PORT_4
.Pq Event A1H, Umask 40H
Cycles which a Uop is dispatched on port 4.
.It Li UOPS_DISPATCHED_PORT.PORT_5
-.Pq Event A1H, Umask 80H
+.Pq Event A1H, Umask 80H
Cycles which a Uop is dispatched on port 5.
.It Li RESOURCE_STALLS.ANY
.Pq Event A2H, Umask 01H
@@ -587,30 +612,30 @@ Cycles Allocation is stalled due to Resource Related reason.
.Pq Event A2H, Umask 02H
Counts the cycles of stall due to lack of load buffers.
.It Li RESOURCE_STALLS.LB
-.Pq Event A2H, Umask 04H
+.Pq Event A2H, Umask 04H
Cycles stalled due to no eligible RS entry available.
.It Li RESOURCE_STALLS.SB
-.Pq Event A2H, Umask 08H
-Cycles stalled due to no store buffers available.
+.Pq Event A2H, Umask 08H
+Cycles stalled due to no store buffers available.
(not including draining form sync)
.It Li RESOURCE_STALLS.ROB
.Pq Event A2H, Umask 10H
Cycles stalled due to re-order buffer full.
.It Li RESOURCE_STALLS.FCSW
-.Pq Event A2H, Umask 20H
+.Pq Event A2H, Umask 20H
Cycles stalled due to writing the FPU control word.
.It Li RESOURCE_STALLS.MXCSR
.Pq Event A2H, Umask 40H
-Cycles stalled due to the MXCSR register rename occurring to close to a previous
+Cycles stalled due to the MXCSR register rename occurring to close to a previous
MXCSR rename.
.It Li RESOURCE_STALLS.OTHER
-.Pq Event A2H, Umask 80H
+.Pq Event A2H, Umask 80H
Cycles stalled while execution was stalled due to other resource issues.
.It Li DSB2MITE_SWITCHES.COUNT
.Pq Event ABH, Umask 01H
Number of DSB to MITE switches.
.It Li DSB2MITE_SWITCHES.PENALTY_CYCLES
-.Pq Event ABH, Umask 02H
+.Pq Event ABH, Umask 02H
Cycles DSB to MITE switches caused delay.
.It Li DSB_FILL.OTHER_CANCEL
.Pq Event ACH, Umask 02H
@@ -619,43 +644,43 @@ Cases of cancelling valid DSB fill not because of exceeding way limit.
.Pq Event ACH, Umask 08H
DSB Fill encountered > 3 DSB lines.
.It Li DSB_FILL.ALL_CANCEL
-.Pq Event ACH, Umask 0AH
-Cases of cancelling valid Decode Stream Buffer (DSB) fill not because of exceeding
+.Pq Event ACH, Umask 0AH
+Cases of cancelling valid Decode Stream Buffer (DSB) fill not because of exceeding
way limit.
.It Li ITLB.ITLB_FLUSH
.Pq Event AEH, Umask 01H
Counts the number of ITLB flushes, includes 4k/2M/4M pages.
.It Li OFFCORE_REQUESTS.DEMAND_DATA_RD
-.Pq Event B0H, Umask 01H
+.Pq Event B0H, Umask 01H
Demand data read requests sent to uncore.
-.It Li OFFCORE_REQUESTS.DEMAND_RFO
-.Pq Event B0H, Umask 04H
+.It Li OFFCORE_REQUESTS.DEMAND_RFO
+.Pq Event B0H, Umask 04H
Demand RFO read requests sent to uncore, including regular RFOs, locks, ItoM.
.It Li OFFCORE_REQUESTS.ALL_DATA_RD
-.Pq Event B0H, Umask 08H
+.Pq Event B0H, Umask 08H
Data read requests sent to uncore (demand and prefetch).
.It Li UOPS_DISPATCHED.THREAD
-.Pq Event B1H, Umask 01H
+.Pq Event B1H, Umask 01H
Counts total number of uops to be dispatched per-thread each cycle.
Set Cmask = 1, INV =1 to count stall cycles.
.It Li UOPS_DISPATCHED.CORE
-.Pq Event B1H, Umask 02H
+.Pq Event B1H, Umask 02H
Counts total number of uops to be dispatched per-core each cycle.
Do not need to set ANY
.It Li OFFCORE_REQUESTS_BUFFER.SQ_FULL
-.Pq Event B2H, Umask 01H
+.Pq Event B2H, Umask 01H
Offcore requests buffer cannot take more entries for this thread core.
.It Li AGU_BYPASS_CANCEL.COUNT
-.Pq Event B6H, Umask 01H
-Counts executed load operations with all the following traits: 1. addressing
-of the format [base + offset], 2. the offset is between 1 and 2047, 3. the
-address specified in the base register is in one page and the address
+.Pq Event B6H, Umask 01H
+Counts executed load operations with all the following traits: 1. addressing
+of the format [base + offset], 2. the offset is between 1 and 2047, 3. the
+address specified in the base register is in one page and the address
[base+offset] is in another page.
-.It Li OFF_CORE_RESPONSE_0
+.It Li OFF_CORE_RESPONSE_0
.Pq Event B7H, Umask 01H
Off-core Response Performance Monitoring; PMC0 only.
Requires programming MSR 01A6H
-.It Li OFF_CORE_RESPONSE_1
+.It Li OFF_CORE_RESPONSE_1
.Pq Event BBH, Umask 01H
Off-core Response Performance Monitoring. PMC3 only.
Requires programming MSR 01A7H
@@ -663,96 +688,96 @@ Requires programming MSR 01A7H
.Pq Event BDH, Umask 01H
DTLB flush attempts of the thread-specific entries.
.It Li TLB_FLUSH.STLB_ANY
-.Pq Event BDH, Umask 20H
+.Pq Event BDH, Umask 20H
Count number of STLB flush attempts.
.It Li L1D_BLOCKS.BANK_CONFLICT_CYCLES
-.Pq Event BFH, Umask 05H
+.Pq Event BFH, Umask 05H
Cycles when dispatched loads are cancelled due to L1D bank conflicts with other
load ports.
cmask=1
.It Li INST_RETIRED.ANY_P
-.Pq Event C0H, Umask 00H
+.Pq Event C0H, Umask 00H
Number of instructions at retirement.
.It Li INST_RETIRED.PREC_DIST
.Pq Event C0H, Umask 01H
-Precise instruction retired event with HW to reduce effect of PEBS shadow in IP
+Precise instruction retired event with HW to reduce effect of PEBS shadow in IP
distribution PMC1 only; Must quiesce other PMCs.
.It Li INST_RETIRED.X87
-.Pq Event C0H, Umask 02H
+.Pq Event C0H, Umask 02H
X87 instruction retired event.
.It Li OTHER_ASSISTS.ITLB_MISS_RETIRED
-.Pq Event C1H, Umask 02H
+.Pq Event C1H, Umask 02H
Instructions that experienced an ITLB miss.
.It Li OTHER_ASSISTS.AVX_STORE
-.Pq Event C1H, Umask 08H
+.Pq Event C1H, Umask 08H
Number of assists associated with 256-bit AVX store operations.
.It Li OTHER_ASSISTS.AVX_TO_SSE
-.Pq Event C1H, Umask 10H
+.Pq Event C1H, Umask 10H
Number of transitions from AVX256 to legacy SSE when penalty applicable.
.It Li OTHER_ASSISTS.SSE_TO_AVX
-.Pq Event C1H, Umask 20H
+.Pq Event C1H, Umask 20H
Number of transitions from SSE to AVX-256 when penalty applicable.
.It Li UOPS_RETIRED.ALL
-.Pq Event C2H, Umask 01H
+.Pq Event C2H, Umask 01H
Counts the number of micro-ops retired.
Use cmask=1 and invert to count active cycles or stalled cycles.
.It Li UOPS_RETIRED.RETIRE_SLOTS
-.Pq Event C2H, Umask 02H
+.Pq Event C2H, Umask 02H
Counts the number of retirement slots used each cycle.
.It Li MACHINE_CLEARS.MEMORY_ORDERING
-.Pq Event C3H, Umask 02H
+.Pq Event C3H, Umask 02H
Counts the number of machine clears due to memory order conflicts.
-.It Li MACHINE_CLEARS.SMC
+.It Li MACHINE_CLEARS.SMC
.Pq Event C3H, Umask 04H
Counts the number of times that a program writes to a code section.
.It Li MACHINE_CLEARS.MASKMOV
.Pq Event C3H, Umask 20H
-Counts the number of executed AVX masked load operations that refer to an
+Counts the number of executed AVX masked load operations that refer to an
illegal address range with the mask bits set to 0.
.It Li BR_INST_RETIRED.ALL_BRANCH
.Pq Event C4H, Umask 00H
Branch instructions at retirement.
.It Li BR_INST_RETIRED.CONDITIONAL
-.Pq Event C4H, Umask 01H
+.Pq Event C4H, Umask 01H
Counts the number of conditional branch instructions retired.
.It Li BR_INST_RETIRED.NEAR_CALL
-.Pq Event C4H, Umask 02H
+.Pq Event C4H, Umask 02H
Direct and indirect near call instructions retired.
.It Li BR_INST_RETIRED.ALL_BRANCHES
.Pq Event C4H, Umask 04H
Counts the number of branch instructions retired.
.It Li BR_INST_RETIRED.NEAR_RETURN
-.Pq Event C4H, Umask 08H
+.Pq Event C4H, Umask 08H
Counts the number of near return instructions retired.
.It Li BR_INST_RETIRED.NOT_TAKEN
-.Pq Event C4H, Umask 10H
+.Pq Event C4H, Umask 10H
Counts the number of not taken branch instructions retired.
.It Li BR_INST_RETIRED.NEAR_TAKEN
-.Pq Event C4H, Umask 20H
+.Pq Event C4H, Umask 20H
Number of near taken branches retired.
.It Li BR_INST_RETIRED.FAR_BRANCH
-.Pq Event C4H, Umask 40H
+.Pq Event C4H, Umask 40H
Number of far branches retired.
.It Li BR_MISP_RETIRED.ALL_BRANCHES
-.Pq Event C5H, Umask 00H
+.Pq Event C5H, Umask 00H
Mispredicted branch instructions at retirement.
.It Li BR_MISP_RETIRED.CONDITIONAL
-.Pq Event C5H, Umask 01H
+.Pq Event C5H, Umask 01H
Mispredicted conditional branch instructions retired.
.It Li BR_MISP_RETIRED.NEAR_CALL
-.Pq Event C5H, Umask 02H
+.Pq Event C5H, Umask 02H
Direct and indirect mispredicted near call instructions retired.
.It Li BR_MISP_RETIRED.ALL_BRANCH
-.Pq Event C5H, Umask 04H
+.Pq Event C5H, Umask 04H
Mispredicted macro branch instructions retired.
.It Li BR_MISP_RETIRED.NOT_TAKEN
-.Pq Event C5H, Umask 10H
+.Pq Event C5H, Umask 10H
Mispredicted not taken branch instructions retired.
.It Li BR_MISP_RETIRED.TAKEN
-.Pq Event C5H, Umask 20H
+.Pq Event C5H, Umask 20H
Mispredicted taken branch instructions retired.
.It Li FP_ASSIST.X87_OUTPUT
-.Pq Event CAH, Umask 02H
+.Pq Event CAH, Umask 02H
Number of X87 assists due to output value.
.It Li FP_ASSIST.X87_INPUT
.Pq Event CAH, Umask 04H
@@ -761,30 +786,30 @@ Number of X87 assists due to input value.
.Pq Event CAH, Umask 08H
Number of SIMD FP assists due to Output values.
.It Li FP_ASSIST.SIMD_INPUT
-.Pq Event CAH, Umask 10H
+.Pq Event CAH, Umask 10H
Number of SIMD FP assists due to input values.
.It Li FP_ASSIST.ANY
-.Pq Event CAH, Umask 1EH
+.Pq Event CAH, Umask 1EH
Cycles with any input/output SSE* or FP assists.
.It Li ROB_MISC_EVENTS.LBR_INSERTS
-.Pq Event CCH, Umask 20H
+.Pq Event CCH, Umask 20H
Count cases of saving new LBR records by hardware.
.It Li MEM_TRANS_RETIRED.LOAD_LATENCY
.Pq Event CDH, Umask 01H
-Sample loads with specified latency threshold.
-PMC3 only.
+Sample loads with specified latency threshold.
+PMC3 only.
Specify threshold in MSR 0x3F6.
.It Li MEM_TRANS_RETIRED.PRECISE_STORE
-.Pq Event CDH, Umask 02H
-Sample stores and collect precise store operation via PEBS record.
+.Pq Event CDH, Umask 02H
+Sample stores and collect precise store operation via PEBS record.
PMC3 only.
.It Li MEM_UOP_RETIRED.LOADS
.Pq Event D0H, Umask 01H
-Qualify retired memory uops that are loads.
+Qualify retired memory uops that are loads.
Combine with umask 10H, 20H, 40H, 80H.
.It Li MEM_UOP_RETIRED.STORES
.Pq Event D0H, Umask 02H
-Qualify retired memory uops that are stores.
+Qualify retired memory uops that are stores.
Combine with umask 10H, 20H, 40H, 80H.
.It Li MEM_UOP_RETIRED.STLB_MISS
.Pq Event D0H, Umask 10H
@@ -797,101 +822,101 @@ Must combine with umask 01H, 02H, to produce counts.
.It Li MEM_UOP_RETIRED.SPLIT
.Pq Event D0H, Umask 40H
Qualify retired memory uops with line split.
-Must combine with umask 01H, 02H, to produce counts.
+Must combine with umask 01H, 02H, to produce counts.
.It Li MEM_UOP_RETIRED_ALL
-.Pq Event D0H, Umask 80H
+.Pq Event D0H, Umask 80H
Qualify any retired memory uops.
Must combine with umask 01H, 02H, to produce counts.
.It Li MEM_LOAD_UOPS_RETIRED.L1_HIT
-.Pq Event D1H, Umask 01H
+.Pq Event D1H, Umask 01H
Retired load uops with L1 cache hits as data sources.
Must combine with umask 01H, 02H, to produce counts.
.It Li MEM_LOAD_UOPS_RETIRED.L2_HIT
-.Pq Event D1H, Umask 02H
+.Pq Event D1H, Umask 02H
Retired load uops with L2 cache hits as data sources.
.It Li MEM_LOAD_UOPS_RETIRED.LLC_HIT
-.Pq Event D1H, Umask 04H
-Retired load uops which data sources were data hits in LLC without snoops
+.Pq Event D1H, Umask 04H
+Retired load uops which data sources were data hits in LLC without snoops
required.
.It Li MEM_LOAD_UOPS_RETIRED.HIT_LFB
-.Pq Event D1H, Umask 40H
-Retired load uops which data sources were load uops missed L1 but hit FB due
+.Pq Event D1H, Umask 40H
+Retired load uops which data sources were load uops missed L1 but hit FB due
to preceding miss to the same cache line with data not ready.
.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS
-.Pq Event D2H, Umask 01H
-Retired load uops which data sources were LLC hit and cross-core snoop missed in
+.Pq Event D2H, Umask 01H
+Retired load uops which data sources were LLC hit and cross-core snoop missed in
on-pkg core cache.
.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT
-.Pq Event D2H, Umask 02H
-Retired load uops which data sources were LLC and cross-core snoop hits in
+.Pq Event D2H, Umask 02H
+Retired load uops which data sources were LLC and cross-core snoop hits in
on-pkg core cache.
.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM
-.Pq Event D2H, Umask 04H
+.Pq Event D2H, Umask 04H
Retired load uops which data sources were HitM responses from shared LLC.
.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE
.Pq Event D2H, Umask 08H
Retired load uops which data sources were hits in LLC without snoops required.
.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.LLC_MISS
-.Pq Event D4H, Umask 02H
+.Pq Event D4H, Umask 02H
Retired load uops with unknown information as data source in cache serviced the load.
.It Li L2_TRANS.DEMAND_DATA_RD
-.Pq Event F0H, Umask 01H
+.Pq Event F0H, Umask 01H
Demand Data Read requests that access L2 cache.
.It Li L2_TRANS.RF0
-.Pq Event F0H, Umask 02H
+.Pq Event F0H, Umask 02H
RFO requests that access L2 cache.
.It Li L2_TRANS.CODE_RD
-.Pq Event F0H, Umask 04H
+.Pq Event F0H, Umask 04H
L2 cache accesses when fetching instructions.
.It Li L2_TRANS.ALL_PF
-.Pq Event F0H, Umask 08H
+.Pq Event F0H, Umask 08H
L2 or LLC HW prefetches that access L2 cache.
.It Li L2_TRANS.L1D_WB
-.Pq Event F0H, Umask 10H
+.Pq Event F0H, Umask 10H
L1D writebacks that access L2 cache.
.It Li L2_TRANS.L2_FILL
-.Pq Event F0H, Umask 20H
+.Pq Event F0H, Umask 20H
L2 fill requests that access L2 cache.
.It Li L2_TRANS.L2_WB
-.Pq Event F0H, Umask 40H
+.Pq Event F0H, Umask 40H
L2 writebacks that access L2 cache.
.It Li L2_TRANS.ALL_REQUESTS
-.Pq Event F0H, Umask 80H
+.Pq Event F0H, Umask 80H
Transactions accessing L2 pipe.
.It Li L2_LINES_IN.I
.Pq Event F1H, Umask 01H
-L2 cache lines in I state filling L2.
+L2 cache lines in I state filling L2.
Counting does not cover rejects.
.It Li L2_LINES_IN.S
-.Pq Event F1H, Umask 02H
-L2 cache lines in S state filling L2.
+.Pq Event F1H, Umask 02H
+L2 cache lines in S state filling L2.
Counting does not cover rejects.
.It Li L2_LINES_IN.E
-.Pq Event F1H, Umask 04H
-L2 cache lines in E state filling L2.
+.Pq Event F1H, Umask 04H
+L2 cache lines in E state filling L2.
Counting does not cover rejects.
.It Li L2_LINES-IN.ALL
-.Pq Event F1H, Umask 07H
-L2 cache lines filling L2.
+.Pq Event F1H, Umask 07H
+L2 cache lines filling L2.
Counting does not cover rejects.
.It Li L2_LINES_OUT.DEMAND_CLEAN
-.Pq Event F2H, Umask 01H
+.Pq Event F2H, Umask 01H
Clean L2 cache lines evicted by demand.
.It Li L2_LINES_OUT.DEMAND_DIRTY
-.Pq Event F2H, Umask 02H
+.Pq Event F2H, Umask 02H
Dirty L2 cache lines evicted by demand.
.It Li L2_LINES_OUT.PF_CLEAN
.Pq Event F2H, Umask 04H
Clean L2 cache lines evicted by L2 prefetch.
.It Li L2_LINES_OUT.PF_DIRTY
-.Pq Event F2H, Umask 08H
+.Pq Event F2H, Umask 08H
Dirty L2 cache lines evicted by L2 prefetch.
.It Li L2_LINES_OUT.DIRTY_ALL
.Pq Event F2H, Umask 0AH
-Dirty L2 cache lines filling the L2.
+Dirty L2 cache lines filling the L2.
Counting does not cover rejects.
.It Li SQ_MISC.SPLIT_LOCK
-.Pq Event F4H, Umask 10H
+.Pq Event F4H, Umask 10H
Split locks in SQ.
.El
.Sh SEE ALSO
@@ -926,8 +951,7 @@ The
library was written by
.An "Joseph Koshy"
.Aq jkoshy@FreeBSD.org.
-The support for the
-.Lb Sandy Bridge
+The support for the Sandy Bridge
microarchitecture was written by
.An "Davide Italiano"
.Aq davide@FreeBSD.org .