diff options
Diffstat (limited to 'tools/power/x86/turbostat/turbostat.c')
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 1205 |
1 files changed, 608 insertions, 597 deletions
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index f2512d78bcbd..5ad45c2ac5bd 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -142,6 +142,7 @@ struct msr_counter { #define FLAGS_SHOW (1 << 1) #define SYSFS_PERCPU (1 << 1) }; +static int use_android_msr_path; struct msr_counter bic[] = { { 0x0, "usec", NULL, 0, 0, 0, NULL, 0 }, @@ -209,6 +210,8 @@ struct msr_counter bic[] = { { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "LLCkRPS", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "LLC%hit", NULL, 0, 0, 0, NULL, 0 }, }; /* n.b. bic_names must match the order in bic[], above */ @@ -278,6 +281,8 @@ enum bic_names { BIC_NMI, BIC_CPU_c1e, BIC_pct_idle, + BIC_LLC_RPS, + BIC_LLC_HIT, MAX_BIC }; @@ -305,6 +310,7 @@ static cpu_set_t bic_group_frequency; static cpu_set_t bic_group_hw_idle; static cpu_set_t bic_group_sw_idle; static cpu_set_t bic_group_idle; +static cpu_set_t bic_group_cache; static cpu_set_t bic_group_other; static cpu_set_t bic_group_disabled_by_default; static cpu_set_t bic_enabled; @@ -413,9 +419,14 @@ static void bic_groups_init(void) SET_BIC(BIC_pct_idle, &bic_group_sw_idle); BIC_INIT(&bic_group_idle); + CPU_OR(&bic_group_idle, &bic_group_idle, &bic_group_hw_idle); SET_BIC(BIC_pct_idle, &bic_group_idle); + BIC_INIT(&bic_group_cache); + SET_BIC(BIC_LLC_RPS, &bic_group_cache); + SET_BIC(BIC_LLC_HIT, &bic_group_cache); + BIC_INIT(&bic_group_other); SET_BIC(BIC_IRQ, &bic_group_other); SET_BIC(BIC_NMI, &bic_group_other); @@ -466,12 +477,11 @@ static void bic_groups_init(void) #define PCL_10 14 /* PC10 */ #define PCLUNL 15 /* Unlimited */ -struct amperf_group_fd; - char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; int *fd_instr_count_percpu; +int *fd_llc_percpu; struct timeval interval_tv = { 5, 0 }; struct timespec interval_ts = { 5, 0 }; @@ -482,11 +492,12 @@ unsigned int quiet; unsigned int shown; unsigned int sums_need_wide_columns; unsigned int rapl_joules; +unsigned int valid_rapl_msrs; unsigned int summary_only; unsigned int list_header_only; unsigned int dump_only; unsigned int force_load; -unsigned int has_aperf; +unsigned int cpuid_has_aperf_mperf; unsigned int has_aperf_access; unsigned int has_epb; unsigned int has_turbo; @@ -552,8 +563,7 @@ static struct gfx_sysfs_info gfx_info[GFX_MAX]; int get_msr(int cpu, off_t offset, unsigned long long *msr); int add_counter(unsigned int msr_num, char *path, char *name, - unsigned int width, enum counter_scope scope, - enum counter_type type, enum counter_format format, int flags, int package_num); + unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format, int flags, int package_num); /* Model specific support Start */ @@ -578,7 +588,7 @@ struct platform_features { bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */ int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */ int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */ - int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ + int plat_rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */ bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */ bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */ @@ -733,7 +743,7 @@ static const struct platform_features snb_features = { .cst_limit = CST_LIMIT_SNB, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features snx_features = { @@ -745,7 +755,7 @@ static const struct platform_features snx_features = { .cst_limit = CST_LIMIT_SNB, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, }; static const struct platform_features ivb_features = { @@ -758,7 +768,7 @@ static const struct platform_features ivb_features = { .cst_limit = CST_LIMIT_SNB, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features ivx_features = { @@ -770,7 +780,7 @@ static const struct platform_features ivx_features = { .cst_limit = CST_LIMIT_SNB, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE | TRL_LIMIT1, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, }; static const struct platform_features hsw_features = { @@ -784,7 +794,7 @@ static const struct platform_features hsw_features = { .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features hsx_features = { @@ -798,7 +808,7 @@ static const struct platform_features hsx_features = { .has_irtl_msrs = 1, .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2, .plr_msrs = PLR_CORE | PLR_RING, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, .has_fixed_rapl_unit = 1, }; @@ -813,7 +823,7 @@ static const struct platform_features hswl_features = { .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features hswg_features = { @@ -827,7 +837,7 @@ static const struct platform_features hswg_features = { .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features bdw_features = { @@ -840,7 +850,7 @@ static const struct platform_features bdw_features = { .cst_limit = CST_LIMIT_HSW, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features bdwg_features = { @@ -853,7 +863,7 @@ static const struct platform_features bdwg_features = { .cst_limit = CST_LIMIT_HSW, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features bdx_features = { @@ -867,7 +877,7 @@ static const struct platform_features bdx_features = { .has_irtl_msrs = 1, .has_cst_auto_convension = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, .has_fixed_rapl_unit = 1, }; @@ -884,7 +894,7 @@ static const struct platform_features skl_features = { .has_ext_cst_msrs = 1, .trl_msrs = TRL_BASE, .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, .enable_tsc_tweak = 1, }; @@ -901,7 +911,7 @@ static const struct platform_features cnl_features = { .has_ext_cst_msrs = 1, .trl_msrs = TRL_BASE, .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, .enable_tsc_tweak = 1, }; @@ -919,7 +929,7 @@ static const struct platform_features adl_features = { .has_ext_cst_msrs = cnl_features.has_ext_cst_msrs, .trl_msrs = cnl_features.trl_msrs, .tcc_offset_bits = cnl_features.tcc_offset_bits, - .rapl_msrs = cnl_features.rapl_msrs, + .plat_rapl_msrs = cnl_features.plat_rapl_msrs, .enable_tsc_tweak = cnl_features.enable_tsc_tweak, }; @@ -937,7 +947,7 @@ static const struct platform_features lnl_features = { .has_ext_cst_msrs = adl_features.has_ext_cst_msrs, .trl_msrs = adl_features.trl_msrs, .tcc_offset_bits = adl_features.tcc_offset_bits, - .rapl_msrs = adl_features.rapl_msrs, + .plat_rapl_msrs = adl_features.plat_rapl_msrs, .enable_tsc_tweak = adl_features.enable_tsc_tweak, }; @@ -952,7 +962,7 @@ static const struct platform_features skx_features = { .has_irtl_msrs = 1, .has_cst_auto_convension = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, .has_fixed_rapl_unit = 1, }; @@ -968,7 +978,7 @@ static const struct platform_features icx_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, .has_fixed_rapl_unit = 1, }; @@ -985,7 +995,7 @@ static const struct platform_features spr_features = { .has_cst_prewake_bit = 1, .has_fixed_rapl_psys_unit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features dmr_features = { @@ -1000,7 +1010,7 @@ static const struct platform_features dmr_features = { .has_fixed_rapl_psys_unit = spr_features.has_fixed_rapl_psys_unit, .trl_msrs = spr_features.trl_msrs, .has_msr_module_c6_res_ms = 1, /* DMR has Dual-Core-Module and MC6 MSR */ - .rapl_msrs = 0, /* DMR does not have RAPL MSRs */ + .plat_rapl_msrs = 0, /* DMR does not have RAPL MSRs */ .plr_msrs = 0, /* DMR does not have PLR MSRs */ .has_irtl_msrs = 0, /* DMR does not have IRTL MSRs */ .has_config_tdp = 0, /* DMR does not have CTDP MSRs */ @@ -1019,7 +1029,7 @@ static const struct platform_features srf_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features grr_features = { @@ -1035,7 +1045,7 @@ static const struct platform_features grr_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features slv_features = { @@ -1048,7 +1058,7 @@ static const struct platform_features slv_features = { .has_msr_c6_demotion_policy_config = 1, .has_msr_atom_pkg_c6_residency = 1, .trl_msrs = TRL_ATOM, - .rapl_msrs = RAPL_PKG | RAPL_CORE, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE, .has_rapl_divisor = 1, .rapl_quirk_tdp = 30, }; @@ -1061,7 +1071,7 @@ static const struct platform_features slvd_features = { .cst_limit = CST_LIMIT_SLV, .has_msr_atom_pkg_c6_residency = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE, .rapl_quirk_tdp = 30, }; @@ -1082,7 +1092,7 @@ static const struct platform_features gmt_features = { .cst_limit = CST_LIMIT_GMT, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, }; static const struct platform_features gmtd_features = { @@ -1095,7 +1105,7 @@ static const struct platform_features gmtd_features = { .has_irtl_msrs = 1, .has_msr_core_c1_res = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, }; static const struct platform_features gmtp_features = { @@ -1107,7 +1117,7 @@ static const struct platform_features gmtp_features = { .cst_limit = CST_LIMIT_GMT, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, }; static const struct platform_features tmt_features = { @@ -1118,7 +1128,7 @@ static const struct platform_features tmt_features = { .cst_limit = CST_LIMIT_GMT, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, .enable_tsc_tweak = 1, }; @@ -1130,7 +1140,7 @@ static const struct platform_features tmtd_features = { .cst_limit = CST_LIMIT_GMT, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL, }; static const struct platform_features knl_features = { @@ -1142,7 +1152,7 @@ static const struct platform_features knl_features = { .cst_limit = CST_LIMIT_KNL, .has_msr_knl_core_c6_residency = 1, .trl_msrs = TRL_KNL, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, .has_fixed_rapl_unit = 1, .need_perf_multiplier = 1, }; @@ -1151,7 +1161,7 @@ static const struct platform_features default_features = { }; static const struct platform_features amd_features_with_rapl = { - .rapl_msrs = RAPL_AMD_F17H, + .plat_rapl_msrs = RAPL_AMD_F17H, .has_per_core_rapl = 1, .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ }; @@ -1210,6 +1220,9 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_ARROWLAKE, &adl_features }, { INTEL_LUNARLAKE_M, &lnl_features }, { INTEL_PANTHERLAKE_L, &lnl_features }, + { INTEL_NOVALAKE, &lnl_features }, + { INTEL_NOVALAKE_L, &lnl_features }, + { INTEL_WILDCATLAKE_L, &lnl_features }, { INTEL_ATOM_SILVERMONT, &slv_features }, { INTEL_ATOM_SILVERMONT_D, &slvd_features }, { INTEL_ATOM_AIRMONT, &amt_features }, @@ -1294,8 +1307,7 @@ char *progname; #define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */ cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; -size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, - cpu_subset_size; +size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; #define MAX_ADDED_THREAD_COUNTERS 24 #define MAX_ADDED_CORE_COUNTERS 8 #define MAX_ADDED_PACKAGE_COUNTERS 16 @@ -1991,6 +2003,10 @@ void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size) pmt_counter_resize_(pcounter, new_size); } +struct llc_stats { + unsigned long long references; + unsigned long long misses; +}; struct thread_data { struct timeval tv_begin; struct timeval tv_end; @@ -2003,6 +2019,7 @@ struct thread_data { unsigned long long irq_count; unsigned long long nmi_count; unsigned int smi_count; + struct llc_stats llc; unsigned int cpu_id; unsigned int apic_id; unsigned int x2apic_id; @@ -2118,7 +2135,7 @@ off_t idx_to_offset(int idx) switch (idx) { case IDX_PKG_ENERGY: - if (platform->rapl_msrs & RAPL_AMD_F17H) + if (valid_rapl_msrs & RAPL_AMD_F17H) offset = MSR_PKG_ENERGY_STAT; else offset = MSR_PKG_ENERGY_STATUS; @@ -2184,19 +2201,19 @@ int idx_valid(int idx) { switch (idx) { case IDX_PKG_ENERGY: - return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); + return valid_rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); case IDX_DRAM_ENERGY: - return platform->rapl_msrs & RAPL_DRAM; + return valid_rapl_msrs & RAPL_DRAM; case IDX_PP0_ENERGY: - return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS; + return valid_rapl_msrs & RAPL_CORE_ENERGY_STATUS; case IDX_PP1_ENERGY: - return platform->rapl_msrs & RAPL_GFX; + return valid_rapl_msrs & RAPL_GFX; case IDX_PKG_PERF: - return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; + return valid_rapl_msrs & RAPL_PKG_PERF_STATUS; case IDX_DRAM_PERF: - return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; + return valid_rapl_msrs & RAPL_DRAM_PERF_STATUS; case IDX_PSYS_ENERGY: - return platform->rapl_msrs & RAPL_PSYS; + return valid_rapl_msrs & RAPL_PSYS; default: return 0; } @@ -2362,23 +2379,19 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk return retval; } -int is_cpu_first_thread_in_core(PER_THREAD_PARAMS) +int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c) { - UNUSED(p); - return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0); } -int is_cpu_first_core_in_package(PER_THREAD_PARAMS) +int is_cpu_first_core_in_package(struct thread_data *t, struct pkg_data *p) { - UNUSED(c); - return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0); } -int is_cpu_first_thread_in_package(PER_THREAD_PARAMS) +int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) { - return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p); + return is_cpu_first_thread_in_core(t, c) && is_cpu_first_core_in_package(t, p); } int cpu_migrate(int cpu) @@ -2400,20 +2413,11 @@ int get_msr_fd(int cpu) if (fd) return fd; -#if defined(ANDROID) - sprintf(pathname, "/dev/msr%d", cpu); -#else - sprintf(pathname, "/dev/cpu/%d/msr", cpu); -#endif + sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", cpu); fd = open(pathname, O_RDONLY); if (fd < 0) -#if defined(ANDROID) - err(-1, "%s open failed, try chown or chmod +r /dev/msr*, " - "or run with --no-msr, or run as root", pathname); -#else - err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, " - "or run with --no-msr, or run as root", pathname); -#endif + err(-1, "%s open failed, try chown or chmod +r %s, " + "or run with --no-msr, or run as root", pathname, use_android_msr_path ? "/dev/msr*" : "/dev/cpu/*/msr"); fd_percpu[cpu] = fd; return fd; @@ -2432,6 +2436,13 @@ static void bic_disable_msr_access(void) free_sys_msr_counters(); } +static void bic_disable_perf_access(void) +{ + CLR_BIC(BIC_IPC, &bic_enabled); + CLR_BIC(BIC_LLC_RPS, &bic_enabled); + CLR_BIC(BIC_LLC_HIT, &bic_enabled); +} + static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { assert(!no_perf); @@ -2512,7 +2523,7 @@ int add_rapl_msr_counter(int cpu, const struct rapl_counter_arch_info *cai) { int ret; - if (!(platform->rapl_msrs & cai->feature_mask)) + if (!(valid_rapl_msrs & cai->feature_mask)) return -1; ret = add_msr_counter(cpu, cai->msr); @@ -2656,6 +2667,12 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode) } else if (!strcmp(name_list, "idle")) { CPU_OR(ret_set, ret_set, &bic_group_idle); break; + } else if (!strcmp(name_list, "cache")) { + CPU_OR(ret_set, ret_set, &bic_group_cache); + break; + } else if (!strcmp(name_list, "llc")) { + CPU_OR(ret_set, ret_set, &bic_group_cache); + break; } else if (!strcmp(name_list, "swidle")) { CPU_OR(ret_set, ret_set, &bic_group_sw_idle); break; @@ -2677,8 +2694,7 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode) if (mode == SHOW_LIST) { deferred_add_names[deferred_add_index++] = name_list; if (deferred_add_index >= MAX_DEFERRED) { - fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", - MAX_DEFERRED, name_list); + fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", MAX_DEFERRED, name_list); help(); exit(1); } @@ -2687,8 +2703,7 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode) if (debug) fprintf(stderr, "deferred \"%s\"\n", name_list); if (deferred_skip_index >= MAX_DEFERRED) { - fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", - MAX_DEFERRED, name_list); + fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", MAX_DEFERRED, name_list); help(); exit(1); } @@ -2702,6 +2717,47 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode) } } +/* + * print_name() + * Print column header name for raw 64-bit counter in 16 columns (at least 8-char plus a tab) + * Otherwise, allow the name + tab to fit within 8-coumn tab-stop. + * In both cases, left justififed, just like other turbostat columns, + * to allow the column values to consume the tab. + * + * Yes, 32-bit counters can overflow 8-columns, and + * 64-bit counters can overflow 16-columns, but that is uncommon. + */ +static inline int print_name(int width, int *printed, char *delim, char *name, enum counter_type type, enum counter_format format) +{ + UNUSED(type); + + if (format == FORMAT_RAW && width >= 64) + return (sprintf(outp, "%s%-8s", (*printed++ ? delim : ""), name)); + else + return (sprintf(outp, "%s%s", (*printed++ ? delim : ""), name)); +} + +static inline int print_hex_value(int width, int *printed, char *delim, unsigned long long value) +{ + if (width <= 32) + return (sprintf(outp, "%s%08x", (*printed++ ? delim : ""), (unsigned int)value)); + else + return (sprintf(outp, "%s%016llx", (*printed++ ? delim : ""), value)); +} + +static inline int print_decimal_value(int width, int *printed, char *delim, unsigned long long value) +{ + if (width <= 32) + return (sprintf(outp, "%s%d", (*printed++ ? delim : ""), (unsigned int)value)); + else + return (sprintf(outp, "%s%-8lld", (*printed++ ? delim : ""), value)); +} + +static inline int print_float_value(int *printed, char *delim, double value) +{ + return (sprintf(outp, "%s%0.2f", (*printed++ ? delim : ""), value)); +} + void print_header(char *delim) { struct msr_counter *mp; @@ -2757,50 +2813,28 @@ void print_header(char *delim) if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "%sSMI", (printed++ ? delim : "")); - for (mp = sys.tp; mp; mp = mp->next) { + if (DO_BIC(BIC_LLC_RPS)) + outp += sprintf(outp, "%sLLCkRPS", (printed++ ? delim : "")); - if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { - if (mp->width == 64) - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name); - else - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name); - } else { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name); - else - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name); - } - } + if (DO_BIC(BIC_LLC_HIT)) + outp += sprintf(outp, "%sLLC%%hit", (printed++ ? delim : "")); - for (pp = sys.perf_tp; pp; pp = pp->next) { + for (mp = sys.tp; mp; mp = mp->next) + outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format); - if (pp->format == FORMAT_RAW) { - if (pp->width == 64) - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); - } else { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); - } - } + for (pp = sys.perf_tp; pp; pp = pp->next) + outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format); ppmt = sys.pmt_tp; while (ppmt) { switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); - else - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); - + outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; case PMT_TYPE_XTAL_TIME: case PMT_TYPE_TCORE_CLOCK: - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); + outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; } @@ -2825,63 +2859,36 @@ void print_header(char *delim) if (DO_BIC(BIC_CORE_THROT_CNT)) outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); - if (platform->rapl_msrs && !rapl_joules) { + if (valid_rapl_msrs && !rapl_joules) { if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); - } else if (platform->rapl_msrs && rapl_joules) { + } else if (valid_rapl_msrs && rapl_joules) { if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); } - for (mp = sys.cp; mp; mp = mp->next) { - if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { - if (mp->width == 64) - outp += sprintf(outp, "%s%18.18s", delim, mp->name); - else - outp += sprintf(outp, "%s%10.10s", delim, mp->name); - } else { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", delim, mp->name); - else - outp += sprintf(outp, "%s%s", delim, mp->name); - } - } + for (mp = sys.cp; mp; mp = mp->next) + outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format); - for (pp = sys.perf_cp; pp; pp = pp->next) { - - if (pp->format == FORMAT_RAW) { - if (pp->width == 64) - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); - } else { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); - } - } + for (pp = sys.perf_cp; pp; pp = pp->next) + outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format); ppmt = sys.pmt_cp; while (ppmt) { switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); - else - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; case PMT_TYPE_XTAL_TIME: case PMT_TYPE_TCORE_CLOCK: - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); + outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; } ppmt = ppmt->next; } - if (DO_BIC(BIC_PkgTmp)) outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); @@ -2963,51 +2970,22 @@ void print_header(char *delim) if (DO_BIC(BIC_UNCORE_MHZ)) outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : "")); - for (mp = sys.pp; mp; mp = mp->next) { - if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { - if (mp->width == 64) - outp += sprintf(outp, "%s%18.18s", delim, mp->name); - else if (mp->width == 32) - outp += sprintf(outp, "%s%10.10s", delim, mp->name); - else - outp += sprintf(outp, "%s%7.7s", delim, mp->name); - } else { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", delim, mp->name); - else - outp += sprintf(outp, "%s%7.7s", delim, mp->name); - } - } - - for (pp = sys.perf_pp; pp; pp = pp->next) { + for (mp = sys.pp; mp; mp = mp->next) + outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format); - if (pp->format == FORMAT_RAW) { - if (pp->width == 64) - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); - } else { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); - } - } + for (pp = sys.perf_pp; pp; pp = pp->next) + outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format); ppmt = sys.pmt_pp; while (ppmt) { switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); - else - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); - + outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; case PMT_TYPE_XTAL_TIME: case PMT_TYPE_TCORE_CLOCK: - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); + outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; } @@ -3022,6 +3000,25 @@ void print_header(char *delim) outp += sprintf(outp, "\n"); } +/* + * pct() + * + * If absolute value is < 1.1, return percentage + * otherwise, return nan + * + * return value is appropriate for printing percentages with %f + * while flagging some obvious erroneous values. + */ +double pct(double d) +{ + + double abs = fabs(d); + + if (abs < 1.10) + return (100.0 * d); + return nan(""); +} + int dump_counters(PER_THREAD_PARAMS) { int i; @@ -3047,14 +3044,16 @@ int dump_counters(PER_THREAD_PARAMS) if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "SMI: %d\n", t->smi_count); + outp += sprintf(outp, "LLC refs: %lld", t->llc.references); + outp += sprintf(outp, "LLC miss: %lld", t->llc.misses); + outp += sprintf(outp, "LLC Hit%%: %.2f", pct((t->llc.references - t->llc.misses) / t->llc.references)); + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { - outp += - sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, - t->counter[i], mp->sp->path); + outp += sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, t->counter[i], mp->sp->path); } } - if (c && is_cpu_first_thread_in_core(t, c, p)) { + if (c && is_cpu_first_thread_in_core(t, c)) { outp += sprintf(outp, "core: %d\n", c->core_id); outp += sprintf(outp, "c3: %016llX\n", c->c3); outp += sprintf(outp, "c6: %016llX\n", c->c6); @@ -3069,14 +3068,12 @@ int dump_counters(PER_THREAD_PARAMS) outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { - outp += - sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, - c->counter[i], mp->sp->path); + outp += sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, c->counter[i], mp->sp->path); } outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); } - if (p && is_cpu_first_core_in_package(t, c, p)) { + if (p && is_cpu_first_core_in_package(t, p)) { outp += sprintf(outp, "package: %d\n", p->package_id); outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); @@ -3106,9 +3103,7 @@ int dump_counters(PER_THREAD_PARAMS) outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - outp += - sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, - p->counter[i], mp->sp->path); + outp += sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, p->counter[i], mp->sp->path); } } @@ -3134,6 +3129,26 @@ double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desir return scaled; } +void get_perf_llc_stats(int cpu, struct llc_stats *llc) +{ + struct read_format { + unsigned long long num_read; + struct llc_stats llc; + } r; + const ssize_t expected_read_size = sizeof(r); + ssize_t actual_read_size; + + actual_read_size = read(fd_llc_percpu[cpu], &r, expected_read_size); + + if (actual_read_size == -1) + err(-1, "%s(cpu%d,) %d,,%ld\n", __func__, cpu, fd_llc_percpu[cpu], expected_read_size); + + llc->references = r.llc.references; + llc->misses = r.llc.misses; + if (actual_read_size != expected_read_size) + warn("%s: failed to read perf_data (req %zu act %zu)", __func__, expected_read_size, actual_read_size); +} + /* * column formatting convention & formats */ @@ -3143,7 +3158,8 @@ int format_counters(PER_THREAD_PARAMS) struct platform_counters *pplat_cnt = NULL; double interval_float, tsc; - char *fmt8; + char *fmt8 = "%s%.2f"; + int i; struct msr_counter *mp; struct perf_counter_info *pp; @@ -3157,11 +3173,11 @@ int format_counters(PER_THREAD_PARAMS) } /* if showing only 1st thread in core and this isn't one, bail out */ - if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) + if (show_core_only && !is_cpu_first_thread_in_core(t, c)) return 0; /* if showing only 1st thread in pkg and this isn't one, bail out */ - if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p)) + if (show_pkg_only && !is_cpu_first_core_in_package(t, p)) return 0; /*if not summary line and --cpu is used */ @@ -3223,8 +3239,7 @@ int format_counters(PER_THREAD_PARAMS) } if (DO_BIC(BIC_Node)) { if (t) - outp += sprintf(outp, "%s%d", - (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); else outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } @@ -3246,15 +3261,13 @@ int format_counters(PER_THREAD_PARAMS) outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float); if (DO_BIC(BIC_Busy)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(t->mperf / tsc)); if (DO_BIC(BIC_Bzy_MHz)) { if (has_base_hz) - outp += - sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); else - outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), - tsc / units * t->aperf / t->mperf / interval_float); + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), tsc / units * t->aperf / t->mperf / interval_float); } if (DO_BIC(BIC_TSC_MHz)) @@ -3283,96 +3296,81 @@ int format_counters(PER_THREAD_PARAMS) if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); - /* Added counters */ + /* LLC Stats */ + if (DO_BIC(BIC_LLC_RPS) || DO_BIC(BIC_LLC_HIT)) { + if (DO_BIC(BIC_LLC_RPS)) + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), t->llc.references / interval_float / 1000); + + if (DO_BIC(BIC_LLC_HIT)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), pct((t->llc.references - t->llc.misses) / t->llc.references)); + } + + /* Added Thread Counters */ for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { - if (mp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]); - } else if (mp->format == FORMAT_DELTA) { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]); - } else if (mp->format == FORMAT_PERCENT) { + if (mp->format == FORMAT_RAW) + outp += print_hex_value(mp->width, &printed, delim, t->counter[i]); + else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(mp->width, &printed, delim, t->counter[i]); + else if (mp->format == FORMAT_PERCENT) { if (mp->type == COUNTER_USEC) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - t->counter[i] / interval_float / 10000); + outp += print_float_value(&printed, delim, t->counter[i] / interval_float / 10000); else - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc); + outp += print_float_value(&printed, delim, pct(t->counter[i] / tsc)); } } - /* Added perf counters */ + /* Added perf Thread Counters */ for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) { - if (pp->format == FORMAT_RAW) { - if (pp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)t->perf_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]); - } else if (pp->format == FORMAT_DELTA) { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]); - } else if (pp->format == FORMAT_PERCENT) { + if (pp->format == FORMAT_RAW) + outp += print_hex_value(pp->width, &printed, delim, t->perf_counter[i]); + else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(pp->width, &printed, delim, t->perf_counter[i]); + else if (pp->format == FORMAT_PERCENT) { if (pp->type == COUNTER_USEC) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - t->perf_counter[i] / interval_float / 10000); + outp += print_float_value(&printed, delim, t->perf_counter[i] / interval_float / 10000); else - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc); + outp += print_float_value(&printed, delim, pct(t->perf_counter[i] / tsc)); } } + /* Added PMT Thread Counters */ for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = t->pmt_counter[i]; double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)t->pmt_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]); - + outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, t->pmt_counter[i]); break; case PMT_TYPE_XTAL_TIME: - value_converted = 100.0 * value_raw / crystal_hz / interval_float; + value_converted = pct(value_raw / crystal_hz / interval_float); outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; case PMT_TYPE_TCORE_CLOCK: - value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; + value_converted = pct(value_raw / tcore_clock_freq_hz / interval_float); outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); } } /* C1 */ if (DO_BIC(BIC_CPU_c1)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(t->c1 / tsc)); /* print per-core data only for 1st thread in core */ - if (!is_cpu_first_thread_in_core(t, c, p)) + if (!is_cpu_first_thread_in_core(t, c)) goto done; if (DO_BIC(BIC_CPU_c3)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c3 / tsc)); if (DO_BIC(BIC_CPU_c6)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c6 / tsc)); if (DO_BIC(BIC_CPU_c7)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c7 / tsc)); /* Mod%c6 */ if (DO_BIC(BIC_Mod_c6)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->mc6_us / tsc)); if (DO_BIC(BIC_CoreTmp)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c); @@ -3381,77 +3379,53 @@ int format_counters(PER_THREAD_PARAMS) if (DO_BIC(BIC_CORE_THROT_CNT)) outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); + /* Added Core Counters */ for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { - if (mp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]); - } else if (mp->format == FORMAT_DELTA) { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]); - } else if (mp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc); - } + if (mp->format == FORMAT_RAW) + outp += print_hex_value(mp->width, &printed, delim, c->counter[i]); + else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(mp->width, &printed, delim, c->counter[i]); + else if (mp->format == FORMAT_PERCENT) + outp += print_float_value(&printed, delim, pct(c->counter[i] / tsc)); } + /* Added perf Core counters */ for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { - if (pp->format == FORMAT_RAW) { - if (pp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)c->perf_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]); - } else if (pp->format == FORMAT_DELTA) { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]); - } else if (pp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc); - } + if (pp->format == FORMAT_RAW) + outp += print_hex_value(pp->width, &printed, delim, c->perf_counter[i]); + else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(pp->width, &printed, delim, c->perf_counter[i]); + else if (pp->format == FORMAT_PERCENT) + outp += print_float_value(&printed, delim, pct(c->perf_counter[i] / tsc)); } + /* Added PMT Core counters */ for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = c->pmt_counter[i]; double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)c->pmt_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]); - + outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, c->pmt_counter[i]); break; case PMT_TYPE_XTAL_TIME: - value_converted = 100.0 * value_raw / crystal_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + value_converted = pct(value_raw / crystal_hz / interval_float); + outp += print_float_value(&printed, delim, value_converted); break; case PMT_TYPE_TCORE_CLOCK: - value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + value_converted = pct(value_raw / tcore_clock_freq_hz / interval_float); + outp += print_float_value(&printed, delim, value_converted); } } - fmt8 = "%s%.2f"; - if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float)); /* print per-package data only for 1st core in package */ - if (!is_cpu_first_core_in_package(t, c, p)) + if (!is_cpu_first_core_in_package(t, p)) goto done; /* PkgTmp */ @@ -3463,8 +3437,7 @@ int format_counters(PER_THREAD_PARAMS) if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */ outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); } else { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - p->gfx_rc6_ms / 10.0 / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), p->gfx_rc6_ms / 10.0 / interval_float); } } @@ -3481,8 +3454,7 @@ int format_counters(PER_THREAD_PARAMS) if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */ outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); } else { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - p->sam_mc6_ms / 10.0 / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), p->sam_mc6_ms / 10.0 / interval_float); } } @@ -3496,150 +3468,112 @@ int format_counters(PER_THREAD_PARAMS) /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ if (DO_BIC(BIC_Totl_c0)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100 * p->pkg_wtd_core_c0 / tsc); /* can exceed 100% */ if (DO_BIC(BIC_Any_c0)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_any_core_c0 / tsc)); if (DO_BIC(BIC_GFX_c0)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_any_gfxe_c0 / tsc)); if (DO_BIC(BIC_CPUGFX)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_both_core_gfxe_c0 / tsc)); if (DO_BIC(BIC_Pkgpc2)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc2 / tsc)); if (DO_BIC(BIC_Pkgpc3)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc3 / tsc)); if (DO_BIC(BIC_Pkgpc6)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc6 / tsc)); if (DO_BIC(BIC_Pkgpc7)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc7 / tsc)); if (DO_BIC(BIC_Pkgpc8)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc8 / tsc)); if (DO_BIC(BIC_Pkgpc9)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc9 / tsc)); if (DO_BIC(BIC_Pkgpc10)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc10 / tsc)); if (DO_BIC(BIC_Diec6)) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->die_c6 / crystal_hz / interval_float)); if (DO_BIC(BIC_CPU_LPI)) { if (p->cpu_lpi >= 0) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - 100.0 * p->cpu_lpi / 1000000.0 / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->cpu_lpi / 1000000.0 / interval_float)); else outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); } if (DO_BIC(BIC_SYS_LPI)) { if (p->sys_lpi >= 0) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - 100.0 * p->sys_lpi / 1000000.0 / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->sys_lpi / 1000000.0 / interval_float)); else outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); } if (DO_BIC(BIC_PkgWatt)) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_GFXWatt)) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_RAMWatt)) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_Pkg_J)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_GFX_J)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_RAM_J)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_PKG__)) outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float)); + sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_RAM__)) outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float)); + sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float)); /* UncMHz */ if (DO_BIC(BIC_UNCORE_MHZ)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz); + /* Added Package Counters */ for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { - if (mp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]); - } else if (mp->format == FORMAT_DELTA) { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]); - } else if (mp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc); - } else if (mp->type == COUNTER_K2M) + if (mp->format == FORMAT_RAW) + outp += print_hex_value(mp->width, &printed, delim, p->counter[i]); + else if (mp->type == COUNTER_K2M) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000); + else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(mp->width, &printed, delim, p->counter[i]); + else if (mp->format == FORMAT_PERCENT) + outp += print_float_value(&printed, delim, pct(p->counter[i] / tsc)); } + /* Added perf Package Counters */ for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { - if (pp->format == FORMAT_RAW) { - if (pp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)p->perf_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]); - } else if (pp->format == FORMAT_DELTA) { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]); - } else if (pp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc); - } else if (pp->type == COUNTER_K2M) { - outp += - sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); - } + if (pp->format == FORMAT_RAW) + outp += print_hex_value(pp->width, &printed, delim, p->perf_counter[i]); + else if (pp->type == COUNTER_K2M) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); + else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(pp->width, &printed, delim, p->perf_counter[i]); + else if (pp->format == FORMAT_PERCENT) + outp += print_float_value(&printed, delim, pct(p->perf_counter[i] / tsc)); } + /* Added PMT Package Counters */ for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = p->pmt_counter[i]; double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)p->pmt_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]); - + outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, p->pmt_counter[i]); break; case PMT_TYPE_XTAL_TIME: - value_converted = 100.0 * value_raw / crystal_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + value_converted = pct(value_raw / crystal_hz / interval_float); + outp += print_float_value(&printed, delim, value_converted); break; case PMT_TYPE_TCORE_CLOCK: - value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + value_converted = pct(value_raw / tcore_clock_freq_hz / interval_float); + outp += print_float_value(&printed, delim, value_converted); } } @@ -3754,11 +3688,10 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value; old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value; old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value; - old->rapl_dram_perf_status.raw_value = - new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; + old->rapl_dram_perf_status.raw_value = new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) + if (mp->format == FORMAT_RAW) old->counter[i] = new->counter[i]; else if (mp->format == FORMAT_AVERAGE) old->counter[i] = new->counter[i]; @@ -3862,8 +3795,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d /* check for TSC < 1 Mcycles over interval */ if (old->tsc < (1000 * 1000)) errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n" - "You can disable all c-states by booting with \"idle=poll\"\n" - "or just the deep ones with \"processor.max_cstate=1\""); + "You can disable all c-states by booting with \"idle=poll\"\n" "or just the deep ones with \"processor.max_cstate=1\""); old->c1 = new->c1 - old->c1; @@ -3892,8 +3824,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d old->c1 = 0; else { /* normal case, derive c1 */ - old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 - - core_delta->c6 - core_delta->c7; + old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 - core_delta->c6 - core_delta->c7; } } @@ -3915,6 +3846,12 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d if (DO_BIC(BIC_SMI)) old->smi_count = new->smi_count - old->smi_count; + if (DO_BIC(BIC_LLC_RPS)) + old->llc.references = new->llc.references - old->llc.references; + + if (DO_BIC(BIC_LLC_HIT)) + old->llc.misses = new->llc.misses - old->llc.misses; + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) old->counter[i] = new->counter[i]; @@ -3939,20 +3876,19 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d return 0; } -int delta_cpu(struct thread_data *t, struct core_data *c, - struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) +int delta_cpu(struct thread_data *t, struct core_data *c, struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) { int retval = 0; /* calculate core delta only for 1st thread in core */ - if (is_cpu_first_thread_in_core(t, c, p)) + if (is_cpu_first_thread_in_core(t, c)) delta_core(c, c2); /* always calculate thread delta */ retval = delta_thread(t, t2, c2); /* c2 is core delta */ /* calculate package delta only for 1st core in package */ - if (is_cpu_first_core_in_package(t, c, p)) + if (is_cpu_first_core_in_package(t, p)) retval |= delta_package(p, p2); return retval; @@ -3993,6 +3929,9 @@ void clear_counters(PER_THREAD_PARAMS) t->nmi_count = 0; t->smi_count = 0; + t->llc.references = 0; + t->llc.misses = 0; + c->c3 = 0; c->c6 = 0; c->c7 = 0; @@ -4001,6 +3940,9 @@ void clear_counters(PER_THREAD_PARAMS) rapl_counter_clear(&c->core_energy); c->core_throt_cnt = 0; + t->llc.references = 0; + t->llc.misses = 0; + p->pkg_wtd_core_c0 = 0; p->pkg_any_core_c0 = 0; p->pkg_any_gfxe_c0 = 0; @@ -4098,6 +4040,9 @@ int sum_counters(PER_THREAD_PARAMS) average.threads.nmi_count += t->nmi_count; average.threads.smi_count += t->smi_count; + average.threads.llc.references += t->llc.references; + average.threads.llc.misses += t->llc.misses; + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) continue; @@ -4115,7 +4060,7 @@ int sum_counters(PER_THREAD_PARAMS) } /* sum per-core values only for 1st thread in core */ - if (!is_cpu_first_thread_in_core(t, c, p)) + if (!is_cpu_first_thread_in_core(t, c)) return 0; average.cores.c3 += c->c3; @@ -4145,7 +4090,7 @@ int sum_counters(PER_THREAD_PARAMS) } /* sum per-pkg values only for 1st core in pkg */ - if (!is_cpu_first_core_in_package(t, c, p)) + if (!is_cpu_first_core_in_package(t, p)) return 0; if (DO_BIC(BIC_Totl_c0)) @@ -4411,8 +4356,7 @@ unsigned long long get_legacy_uncore_mhz(int package) */ for (die = 0; die <= topo.max_die_id; ++die) { - sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", - package, die); + sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package, die); if (access(path, R_OK) == 0) return (snapshot_sysfs_counter(path) / 1000); @@ -4523,11 +4467,6 @@ int get_core_throt_cnt(int cpu, unsigned long long *cnt) return 0; } -struct amperf_group_fd { - int aperf; /* Also the group descriptor */ - int mperf; -}; - static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr) { int fdmt; @@ -4727,8 +4666,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data)); if (actual_read_size != expected_read_size) - err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, - actual_read_size); + err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size); } for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { @@ -4966,8 +4904,7 @@ int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data)); if (actual_read_size != expected_read_size) - err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, - actual_read_size); + err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size); } for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) { @@ -5121,6 +5058,9 @@ int get_counters(PER_THREAD_PARAMS) get_smi_aperf_mperf(cpu, t); + if (DO_BIC(BIC_LLC_RPS) || DO_BIC(BIC_LLC_HIT)) + get_perf_llc_stats(cpu, &t->llc); + if (DO_BIC(BIC_IPC)) if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) return -4; @@ -5144,7 +5084,7 @@ int get_counters(PER_THREAD_PARAMS) t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id); /* collect core counters only for 1st thread in core */ - if (!is_cpu_first_thread_in_core(t, c, p)) + if (!is_cpu_first_thread_in_core(t, c)) goto done; if (platform->has_per_core_rapl) { @@ -5188,7 +5128,7 @@ int get_counters(PER_THREAD_PARAMS) c->pmt_counter[i] = pmt_read_counter(pp, c->core_id); /* collect package counters only for 1st core in package */ - if (!is_cpu_first_core_in_package(t, c, p)) + if (!is_cpu_first_core_in_package(t, p)) goto done; if (DO_BIC(BIC_Totl_c0)) { @@ -5277,48 +5217,39 @@ char *pkg_cstate_limit_strings[] = { "unknown", "reserved", "pc0", "pc1", "pc2", "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited" }; -int nhm_pkg_cstate_limits[16] = - { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int nhm_pkg_cstate_limits[16] = { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int snb_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int snb_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int hsw_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int hsw_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int slv_pkg_cstate_limits[16] = - { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int slv_pkg_cstate_limits[16] = { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7 }; -int amt_pkg_cstate_limits[16] = - { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int amt_pkg_cstate_limits[16] = { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int phi_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int phi_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int glm_pkg_cstate_limits[16] = - { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int glm_pkg_cstate_limits[16] = { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int skx_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int skx_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int icx_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int icx_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; @@ -5393,8 +5324,7 @@ static void dump_power_ctl(void) return; get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); - fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", - base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); + fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ if (platform->has_cst_prewake_bit) @@ -5487,8 +5417,7 @@ static void dump_turbo_ratio_limits(int trl_msr_offset) ratio = (msr >> shift) & 0xFF; group_size = (core_counts >> shift) & 0xFF; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", - ratio, bclk, ratio * bclk, group_size); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", ratio, bclk, ratio * bclk, group_size); } return; @@ -5586,9 +5515,7 @@ static void dump_knl_turbo_ratio_limits(void) for (i = buckets_no - 1; i >= 0; i--) if (i > 0 ? ratio[i] != ratio[i - 1] : 1) - fprintf(outf, - "%d * %.1f = %.1f MHz max turbo %d active cores\n", - ratio[i], bclk, ratio[i] * bclk, cores[i]); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", ratio[i], bclk, ratio[i] * bclk, cores[i]); } static void dump_cst_cfg(void) @@ -5673,43 +5600,37 @@ void print_irtl(void) if (platform->supported_cstates & PC3) { get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC6) { get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC7) { get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC8) { get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC9) { get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC10) { get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } } @@ -5743,6 +5664,20 @@ void free_fd_instr_count_percpu(void) fd_instr_count_percpu = NULL; } +void free_fd_llc_percpu(void) +{ + if (!fd_llc_percpu) + return; + + for (int i = 0; i < topo.max_cpu_num + 1; ++i) { + if (fd_llc_percpu[i] != 0) + close(fd_llc_percpu[i]); + } + + free(fd_llc_percpu); + fd_llc_percpu = NULL; +} + void free_fd_cstate(void) { if (!ccstate_counter_info) @@ -5867,6 +5802,7 @@ void free_all_buffers(void) free_fd_percpu(); free_fd_instr_count_percpu(); + free_fd_llc_percpu(); free_fd_msr(); free_fd_rapl_percpu(); free_fd_cstate(); @@ -6213,6 +6149,7 @@ void linux_perf_init(void); void msr_perf_init(void); void rapl_perf_init(void); void cstate_perf_init(void); +void perf_llc_init(void); void added_perf_counters_init(void); void pmt_init(void); @@ -6224,10 +6161,10 @@ void re_initialize(void) msr_perf_init(); rapl_perf_init(); cstate_perf_init(); + perf_llc_init(); added_perf_counters_init(); pmt_init(); - fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, - topo.allowed_cpus); + fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus); } void set_max_cpu_num(void) @@ -6673,6 +6610,7 @@ release_timer: timer_delete(timerid); release_msr: free(per_cpu_msr_sum); + per_cpu_msr_sum = NULL; } /* @@ -6797,21 +6735,43 @@ restart: } } -void check_dev_msr() +int probe_dev_msr(void) { struct stat sb; char pathname[32]; - if (no_msr) - return; -#if defined(ANDROID) sprintf(pathname, "/dev/msr%d", base_cpu); -#else + return !stat(pathname, &sb); +} + +int probe_dev_cpu_msr(void) +{ + struct stat sb; + char pathname[32]; + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); -#endif - if (stat(pathname, &sb)) - if (system("/sbin/modprobe msr > /dev/null 2>&1")) - no_msr = 1; + return !stat(pathname, &sb); +} + +int probe_msr_driver(void) +{ + if (probe_dev_msr()) { + use_android_msr_path = 1; + return 1; + } + return probe_dev_cpu_msr(); +} + +void check_msr_driver(void) +{ + if (probe_msr_driver()) + return; + + if (system("/sbin/modprobe msr > /dev/null 2>&1")) + no_msr = 1; + + if (!probe_msr_driver()) + no_msr = 1; } /* @@ -6866,11 +6826,7 @@ void check_msr_permission(void) failed += check_for_cap_sys_rawio(); /* test file permissions */ -#if defined(ANDROID) - sprintf(pathname, "/dev/msr%d", base_cpu); -#else - sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); -#endif + sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", base_cpu); if (euidaccess(pathname, R_OK)) { failed++; } @@ -6999,8 +6955,7 @@ static void probe_intel_uncore_frequency_legacy(void) int k, l; char path_base[128]; - sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, - j); + sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, j); sprintf(path, "%s/current_freq_khz", path_base); if (access(path, R_OK)) @@ -7083,8 +7038,7 @@ static void probe_intel_uncore_frequency_cluster(void) */ if BIC_IS_ENABLED (BIC_UNCORE_MHZ) - add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, - package_id); + add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); if (quiet) continue; @@ -7093,8 +7047,7 @@ static void probe_intel_uncore_frequency_cluster(void) k = read_sysfs_int(path); sprintf(path, "%s/max_freq_khz", path_base); l = read_sysfs_int(path); - fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, - cluster_id, k / 1000, l / 1000); + fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, cluster_id, k / 1000, l / 1000); sprintf(path, "%s/initial_min_freq_khz", path_base); k = read_sysfs_int(path); @@ -7156,21 +7109,17 @@ static void probe_graphics(void) else goto next; - set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", - gt0_is_gt ? GFX_rc6 : SAM_mc6); + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", gt0_is_gt ? GFX_rc6 : SAM_mc6); set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz); - set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", - gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz); + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz); - set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", - gt0_is_gt ? SAM_mc6 : GFX_rc6); + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", gt0_is_gt ? SAM_mc6 : GFX_rc6); set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz); - set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", - gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz); + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz); goto end; } @@ -7425,8 +7374,7 @@ int print_hwp(PER_THREAD_PARAMS) "(high %d guar %d eff %d low %d)\n", cpu, msr, (unsigned int)HWP_HIGHEST_PERF(msr), - (unsigned int)HWP_GUARANTEED_PERF(msr), - (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); + (unsigned int)HWP_GUARANTEED_PERF(msr), (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) return 0; @@ -7437,8 +7385,7 @@ int print_hwp(PER_THREAD_PARAMS) (unsigned int)(((msr) >> 0) & 0xff), (unsigned int)(((msr) >> 8) & 0xff), (unsigned int)(((msr) >> 16) & 0xff), - (unsigned int)(((msr) >> 24) & 0xff), - (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); + (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); if (has_hwp_pkg) { if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) @@ -7449,23 +7396,20 @@ int print_hwp(PER_THREAD_PARAMS) cpu, msr, (unsigned int)(((msr) >> 0) & 0xff), (unsigned int)(((msr) >> 8) & 0xff), - (unsigned int)(((msr) >> 16) & 0xff), - (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); + (unsigned int)(((msr) >> 16) & 0xff), (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); } if (has_hwp_notify) { if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) return 0; fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " - "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", - cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); + "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); } if (get_msr(cpu, MSR_HWP_STATUS, &msr)) return 0; fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " - "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", - cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); + "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); return 0; } @@ -7510,8 +7454,7 @@ int print_perf_limit(PER_THREAD_PARAMS) (msr & 1 << 6) ? "VR-Therm, " : "", (msr & 1 << 5) ? "Auto-HWP, " : "", (msr & 1 << 4) ? "Graphics, " : "", - (msr & 1 << 2) ? "bit2, " : "", - (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); + (msr & 1 << 2) ? "bit2, " : "", (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", (msr & 1 << 31) ? "bit31, " : "", (msr & 1 << 30) ? "bit30, " : "", @@ -7524,8 +7467,7 @@ int print_perf_limit(PER_THREAD_PARAMS) (msr & 1 << 22) ? "VR-Therm, " : "", (msr & 1 << 21) ? "Auto-HWP, " : "", (msr & 1 << 20) ? "Graphics, " : "", - (msr & 1 << 18) ? "bit18, " : "", - (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); + (msr & 1 << 18) ? "bit18, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); } if (platform->plr_msrs & PLR_GFX) { @@ -7537,16 +7479,14 @@ int print_perf_limit(PER_THREAD_PARAMS) (msr & 1 << 4) ? "Graphics, " : "", (msr & 1 << 6) ? "VR-Therm, " : "", (msr & 1 << 8) ? "Amps, " : "", - (msr & 1 << 9) ? "GFXPwr, " : "", - (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); + (msr & 1 << 9) ? "GFXPwr, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", (msr & 1 << 16) ? "PROCHOT, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 20) ? "Graphics, " : "", (msr & 1 << 22) ? "VR-Therm, " : "", (msr & 1 << 24) ? "Amps, " : "", - (msr & 1 << 25) ? "GFXPwr, " : "", - (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); + (msr & 1 << 25) ? "GFXPwr, " : "", (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); } if (platform->plr_msrs & PLR_RING) { get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); @@ -7555,14 +7495,12 @@ int print_perf_limit(PER_THREAD_PARAMS) (msr & 1 << 0) ? "PROCHOT, " : "", (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 6) ? "VR-Therm, " : "", - (msr & 1 << 8) ? "Amps, " : "", - (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); + (msr & 1 << 8) ? "Amps, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", (msr & 1 << 16) ? "PROCHOT, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 22) ? "VR-Therm, " : "", - (msr & 1 << 24) ? "Amps, " : "", - (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); + (msr & 1 << 24) ? "Amps, " : "", (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); } return 0; } @@ -7582,7 +7520,7 @@ double get_tdp_intel(void) { unsigned long long msr; - if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) + if (valid_rapl_msrs & RAPL_PKG_POWER_INFO) if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; return get_quirk_tdp(); @@ -7613,12 +7551,12 @@ void rapl_probe_intel(void) CLR_BIC(BIC_GFX_J, &bic_enabled); } - if (!platform->rapl_msrs || no_msr) + if (!valid_rapl_msrs || no_msr) return; - if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS)) + if (!(valid_rapl_msrs & RAPL_PKG_PERF_STATUS)) CLR_BIC(BIC_PKG__, &bic_enabled); - if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)) + if (!(valid_rapl_msrs & RAPL_DRAM_PERF_STATUS)) CLR_BIC(BIC_RAM__, &bic_enabled); /* units on package 0, verify later other packages match */ @@ -7667,7 +7605,7 @@ void rapl_probe_amd(void) CLR_BIC(BIC_Cor_J, &bic_enabled); } - if (!platform->rapl_msrs || no_msr) + if (!valid_rapl_msrs || no_msr) return; if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) @@ -7690,8 +7628,7 @@ void print_power_limit_msr(int cpu, unsigned long long msr, char *label) cpu, label, ((msr >> 15) & 1) ? "EN" : "DIS", ((msr >> 0) & 0x7FFF) * rapl_power_units, - (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, - (((msr >> 16) & 1) ? "EN" : "DIS")); + (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, (((msr >> 16) & 1) ? "EN" : "DIS")); return; } @@ -7857,7 +7794,7 @@ int print_rapl(PER_THREAD_PARAMS) UNUSED(c); UNUSED(p); - if (!platform->rapl_msrs) + if (!valid_rapl_msrs) return 0; /* RAPL counters are per package, so print only for 1st thread/package */ @@ -7870,7 +7807,7 @@ int print_rapl(PER_THREAD_PARAMS) return -1; } - if (platform->rapl_msrs & RAPL_AMD_F17H) { + if (valid_rapl_msrs & RAPL_AMD_F17H) { msr_name = "MSR_RAPL_PWR_UNIT"; if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) return -1; @@ -7883,7 +7820,7 @@ int print_rapl(PER_THREAD_PARAMS) fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, rapl_power_units, rapl_energy_units, rapl_time_units); - if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) { + if (valid_rapl_msrs & RAPL_PKG_POWER_INFO) { if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) return -5; @@ -7892,25 +7829,22 @@ int print_rapl(PER_THREAD_PARAMS) cpu, msr, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, - ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, - ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); } - if (platform->rapl_msrs & RAPL_PKG) { + if (valid_rapl_msrs & RAPL_PKG) { if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) return -9; - fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 63) & 1 ? "" : "UN"); + fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 63) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "PKG Limit #1"); fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n", cpu, ((msr >> 47) & 1) ? "EN" : "DIS", ((msr >> 32) & 0x7FFF) * rapl_power_units, - (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, - ((msr >> 48) & 1) ? "EN" : "DIS"); + (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, ((msr >> 48) & 1) ? "EN" : "DIS"); if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr)) return -9; @@ -7920,7 +7854,7 @@ int print_rapl(PER_THREAD_PARAMS) cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); } - if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) { + if (valid_rapl_msrs & RAPL_DRAM_POWER_INFO) { if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) return -6; @@ -7928,31 +7862,28 @@ int print_rapl(PER_THREAD_PARAMS) cpu, msr, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, - ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, - ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); } - if (platform->rapl_msrs & RAPL_DRAM) { + if (valid_rapl_msrs & RAPL_DRAM) { if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) return -9; - fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "DRAM Limit"); } - if (platform->rapl_msrs & RAPL_CORE_POLICY) { + if (valid_rapl_msrs & RAPL_CORE_POLICY) { if (get_msr(cpu, MSR_PP0_POLICY, &msr)) return -7; fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); } - if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) { + if (valid_rapl_msrs & RAPL_CORE_POWER_LIMIT) { if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) return -9; - fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "Cores Limit"); } - if (platform->rapl_msrs & RAPL_GFX) { + if (valid_rapl_msrs & RAPL_GFX) { if (get_msr(cpu, MSR_PP1_POLICY, &msr)) return -8; @@ -7960,20 +7891,58 @@ int print_rapl(PER_THREAD_PARAMS) if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) return -9; - fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "GFX Limit"); } return 0; } /* + * probe_rapl_msrs + * + * initialize global valid_rapl_msrs to platform->plat_rapl_msrs + * only if PKG_ENERGY counter is enumerated and reads non-zero + */ +void probe_rapl_msrs(void) +{ + int ret; + off_t offset; + unsigned long long msr_value; + + if (no_msr) + return; + + if ((platform->plat_rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H)) == 0) + return; + + offset = idx_to_offset(IDX_PKG_ENERGY); + if (offset < 0) + return; + + ret = get_msr(base_cpu, offset, &msr_value); + if (ret) { + if (debug) + fprintf(outf, "Can not read RAPL_PKG_ENERGY MSR(0x%llx)\n", (unsigned long long)offset); + return; + } + if (msr_value == 0) { + if (debug) + fprintf(outf, "RAPL_PKG_ENERGY MSR(0x%llx) == ZERO: disabling all RAPL MSRs\n", (unsigned long long)offset); + return; + } + + valid_rapl_msrs = platform->plat_rapl_msrs; /* success */ +} + +/* * probe_rapl() * * sets rapl_power_units, rapl_energy_units, rapl_time_units */ void probe_rapl(void) { + probe_rapl_msrs(); + if (genuine_intel) rapl_probe_intel(); if (authentic_amd || hygon_genuine) @@ -7984,7 +7953,7 @@ void probe_rapl(void) print_rapl_sysfs(); - if (!platform->rapl_msrs || no_msr) + if (!valid_rapl_msrs || no_msr) return; for_all_cpus(print_rapl, ODD_COUNTERS); @@ -8088,7 +8057,7 @@ int print_thermal(PER_THREAD_PARAMS) cpu = t->cpu_id; /* DTS is per-core, no need to print for each thread */ - if (!is_cpu_first_thread_in_core(t, c, p)) + if (!is_cpu_first_thread_in_core(t, c)) return 0; if (cpu_migrate(cpu)) { @@ -8096,7 +8065,7 @@ int print_thermal(PER_THREAD_PARAMS) return -1; } - if (do_ptm && is_cpu_first_core_in_package(t, c, p)) { + if (do_ptm && is_cpu_first_core_in_package(t, p)) { if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) return 0; @@ -8108,8 +8077,7 @@ int print_thermal(PER_THREAD_PARAMS) dts = (msr >> 16) & 0x7F; dts2 = (msr >> 8) & 0x7F; - fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", - cpu, msr, tj_max - dts, tj_max - dts2); + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", cpu, msr, tj_max - dts, tj_max - dts2); } if (do_dts && debug) { @@ -8120,16 +8088,14 @@ int print_thermal(PER_THREAD_PARAMS) dts = (msr >> 16) & 0x7F; resolution = (msr >> 27) & 0xF; - fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", - cpu, msr, tj_max - dts, resolution); + fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", cpu, msr, tj_max - dts, resolution); if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) return 0; dts = (msr >> 16) & 0x7F; dts2 = (msr >> 8) & 0x7F; - fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", - cpu, msr, tj_max - dts, tj_max - dts2); + fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", cpu, msr, tj_max - dts, tj_max - dts2); } return 0; @@ -8203,8 +8169,7 @@ void decode_misc_enable_msr(void) msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-", - msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", - msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); + msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); } void decode_misc_feature_control(void) @@ -8243,8 +8208,7 @@ void decode_misc_pwr_mgmt_msr(void) if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", - base_cpu, msr, - msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); + base_cpu, msr, msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); } /* @@ -8297,30 +8261,26 @@ void print_dev_latency(void) close(fd); } -static int has_instr_count_access(void) +static int has_perf_instr_count_access(void) { int fd; - int has_access; if (no_perf) return 0; fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); - has_access = fd != -1; - if (fd != -1) close(fd); - if (!has_access) + if (fd == -1) warnx("Failed to access %s. Some of the counters may not be available\n" - "\tRun as root to enable them or use %s to disable the access explicitly", - "instructions retired perf counter", "--no-perf"); + "\tRun as root to enable them or use %s to disable the access explicitly", "perf instructions retired counter", + "'--hide IPC' or '--no-perf'"); - return has_access; + return (fd != -1); } -int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, - double *scale_, enum rapl_unit *unit_) +int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, double *scale_, enum rapl_unit *unit_) { int ret = -1; @@ -8370,11 +8330,16 @@ void linux_perf_init(void) if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) return; - if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) { + if (BIC_IS_ENABLED(BIC_IPC) && cpuid_has_aperf_mperf) { fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); if (fd_instr_count_percpu == NULL) err(-1, "calloc fd_instr_count_percpu"); } + if (BIC_IS_ENABLED(BIC_LLC_RPS)) { + fd_llc_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); + if (fd_llc_percpu == NULL) + err(-1, "calloc fd_llc_percpu"); + } } void rapl_perf_init(void) @@ -8485,7 +8450,7 @@ void rapl_perf_init(void) /* Assumes msr_counter_info is populated */ static int has_amperf_access(void) { - return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && + return cpuid_has_aperf_mperf && msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present; } @@ -8708,8 +8673,7 @@ void cstate_perf_init_(bool soft_c1) cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; /* User MSR for this counter */ - } else if (pkg_cstate_limit >= cai->pkg_cstate_limit - && add_msr_counter(cpu, cai->msr) >= 0) { + } else if (pkg_cstate_limit >= cai->pkg_cstate_limit && add_msr_counter(cpu, cai->msr) >= 0) { cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; cci->msr[cai->rci_index] = cai->msr; } @@ -8827,8 +8791,7 @@ void process_cpuid() hygon_genuine = 1; if (!quiet) - fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", - (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); + fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); __cpuid(1, fms, ebx, ecx, edx); family = (fms >> 8) & 0xf; @@ -8857,8 +8820,7 @@ void process_cpuid() __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); if (!quiet) { - fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", - family, model, stepping, family, model, stepping); + fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", family, model, stepping, family, model, stepping); if (ucode_patch_valid) fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); fputc('\n', outf); @@ -8872,8 +8834,7 @@ void process_cpuid() ecx_flags & (1 << 8) ? "TM2" : "-", edx_flags & (1 << 4) ? "TSC" : "-", edx_flags & (1 << 5) ? "MSR" : "-", - edx_flags & (1 << 22) ? "ACPI-TM" : "-", - edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); + edx_flags & (1 << 22) ? "ACPI-TM" : "-", edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); } probe_platform_features(family, model); @@ -8897,7 +8858,7 @@ void process_cpuid() */ __cpuid(0x6, eax, ebx, ecx, edx); - has_aperf = ecx & (1 << 0); + cpuid_has_aperf_mperf = ecx & (1 << 0); do_dts = eax & (1 << 0); if (do_dts) BIC_PRESENT(BIC_CoreTmp); @@ -8915,14 +8876,13 @@ void process_cpuid() if (!quiet) fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, " "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", - has_aperf ? "" : "No-", + cpuid_has_aperf_mperf ? "" : "No-", has_turbo ? "" : "No-", do_dts ? "" : "No-", do_ptm ? "" : "No-", has_hwp ? "" : "No-", has_hwp_notify ? "" : "No-", - has_hwp_activity_window ? "" : "No-", - has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); + has_hwp_activity_window ? "" : "No-", has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); if (!quiet) decode_misc_enable_msr(); @@ -8956,8 +8916,7 @@ void process_cpuid() if (ebx_tsc != 0) { if (!quiet && (ebx != 0)) - fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", - eax_crystal, ebx_tsc, crystal_hz); + fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", eax_crystal, ebx_tsc, crystal_hz); if (crystal_hz == 0) crystal_hz = platform->crystal_freq; @@ -8989,11 +8948,10 @@ void process_cpuid() tsc_tweak = base_hz / tsc_hz; if (!quiet) - fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", - base_mhz, max_mhz, bus_mhz); + fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", base_mhz, max_mhz, bus_mhz); } - if (has_aperf) + if (cpuid_has_aperf_mperf) aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1; BIC_PRESENT(BIC_IRQ); @@ -9045,6 +9003,62 @@ void probe_pm_features(void) decode_misc_feature_control(); } +/* perf_llc_probe + * + * return 1 on success, else 0 + */ +int has_perf_llc_access(void) +{ + int fd; + + if (no_perf) + return 0; + + fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, -1, PERF_FORMAT_GROUP); + if (fd != -1) + close(fd); + + if (fd == -1) + warnx("Failed to access %s. Some of the counters may not be available\n" + "\tRun as root to enable them or use %s to disable the access explicitly", "perf LLC counters", "'--hide LLC' or '--no-perf'"); + + return (fd != -1); +} + +void perf_llc_init(void) +{ + int cpu; + int retval; + + if (no_perf) + return; + if (!(BIC_IS_ENABLED(BIC_LLC_RPS) && BIC_IS_ENABLED(BIC_LLC_HIT))) + return; + + for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { + + if (cpu_is_not_allowed(cpu)) + continue; + + assert(fd_llc_percpu != 0); + fd_llc_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, -1, PERF_FORMAT_GROUP); + if (fd_llc_percpu[cpu] == -1) { + warnx("%s: perf REFS: failed to open counter on cpu%d", __func__, cpu); + free_fd_llc_percpu(); + return; + } + assert(fd_llc_percpu != 0); + retval = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, fd_llc_percpu[cpu], PERF_FORMAT_GROUP); + if (retval == -1) { + warnx("%s: perf MISS: failed to open counter on cpu%d", __func__, cpu); + free_fd_llc_percpu(); + return; + } + } + BIC_PRESENT(BIC_LLC_RPS); + BIC_PRESENT(BIC_LLC_HIT); +} + /* * in /dev/cpu/ return success for names that are numbers * ie. filter out ".", "..", "microcode". @@ -9351,6 +9365,7 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, t->cpu_id = cpu_id; if (!cpu_is_not_allowed(cpu_id)) { + if (c->base_cpu < 0) c->base_cpu = t->cpu_id; if (pkg_base[pkg_id].base_cpu < 0) @@ -9456,7 +9471,7 @@ bool has_added_counters(void) void check_msr_access(void) { - check_dev_msr(); + check_msr_driver(); check_msr_permission(); if (no_msr) @@ -9465,8 +9480,16 @@ void check_msr_access(void) void check_perf_access(void) { - if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access()) - CLR_BIC(BIC_IPC, &bic_enabled); + if (BIC_IS_ENABLED(BIC_IPC)) + if (!has_perf_instr_count_access()) + no_perf = 1; + + if (BIC_IS_ENABLED(BIC_LLC_RPS) || BIC_IS_ENABLED(BIC_LLC_HIT)) + if (!has_perf_llc_access()) + no_perf = 1; + + if (no_perf) + bic_disable_perf_access(); } bool perf_has_hybrid_devices(void) @@ -9589,8 +9612,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) perf_config = read_perf_config(perf_device, pinfo->event); if (perf_config == (unsigned int)-1) { - warnx("%s: perf/%s/%s: failed to read %s", - __func__, perf_device, pinfo->event, "config"); + warnx("%s: perf/%s/%s: failed to read %s", __func__, perf_device, pinfo->event, "config"); continue; } @@ -9601,8 +9623,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); if (fd_perf == -1) { - warnx("%s: perf/%s/%s: failed to open counter on cpu%d", - __func__, perf_device, pinfo->event, cpu); + warnx("%s: perf/%s/%s: failed to open counter on cpu%d", __func__, perf_device, pinfo->event, cpu); continue; } @@ -9611,8 +9632,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) pinfo->scale = perf_scale; if (debug) - fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", - perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); + fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); } pinfo = pinfo->next; @@ -9926,8 +9946,7 @@ int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum } if (conflict) { - fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", - __func__, name); + fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", __func__, name); exit(1); } @@ -9970,8 +9989,7 @@ void pmt_init(void) * CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK. */ pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK, - PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU, - FORMAT_DELTA, cpu_num, PMT_OPEN_TRY); + PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU, FORMAT_DELTA, cpu_num, PMT_OPEN_TRY); /* * Rather complex logic for each time we go to the next loop iteration, @@ -10021,6 +10039,7 @@ void turbostat_init() linux_perf_init(); rapl_perf_init(); cstate_perf_init(); + perf_llc_init(); added_perf_counters_init(); pmt_init(); @@ -10126,7 +10145,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2025.09.09 - Len Brown <lenb@kernel.org>\n"); + fprintf(outf, "turbostat version 2025.12.02 - Len Brown <lenb@kernel.org>\n"); } #define COMMAND_LINE_SIZE 2048 @@ -10166,8 +10185,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) } int add_counter(unsigned int msr_num, char *path, char *name, - unsigned int width, enum counter_scope scope, - enum counter_type type, enum counter_format format, int flags, int id) + unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format, int flags, int id) { struct msr_counter *msrp; @@ -10276,9 +10294,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, struct perf_counter_info *make_perf_counter_info(const char *perf_device, const char *perf_event, const char *name, - unsigned int width, - enum counter_scope scope, - enum counter_type type, enum counter_format format) + unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format) { struct perf_counter_info *pinfo; @@ -10353,8 +10369,7 @@ int add_perf_counter(const char *perf_device, const char *perf_event, const char // FIXME: we might not have debug here yet if (debug) - fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", - __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); + fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); return 0; } @@ -10523,8 +10538,7 @@ int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigne pmt_diriter_init(&pmt_iter); - for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL; - dirname = pmt_diriter_next(&pmt_iter)) { + for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL; dirname = pmt_diriter_next(&pmt_iter)) { fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY); if (fd_telem_dir == -1) @@ -10536,8 +10550,7 @@ int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigne } if (fstat(fd_telem_dir, &stat) == -1) { - fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__, - dirname->d_name, strerror(errno)); + fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__, dirname->d_name, strerror(errno)); continue; } @@ -10633,8 +10646,7 @@ void parse_add_command_pmt(char *add_command) } if (!has_scope) { - printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", - __func__); + printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", __func__); exit(1); } @@ -10710,8 +10722,7 @@ next: } if (!has_format) { - fprintf(stderr, "%s: Invalid format %s. Expected raw, average or delta\n", - __func__, format_name); + fprintf(stderr, "%s: Invalid format %s. Expected raw, average or delta\n", __func__, format_name); exit(1); } } @@ -10878,7 +10889,7 @@ void probe_cpuidle_residency(void) if (is_deferred_skip(name_buf)) continue; - add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); + add_counter(0, path, name_buf, 32, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); if (state > max_state) max_state = state; |
