diff options
Diffstat (limited to 'tools/power')
30 files changed, 447 insertions, 111 deletions
diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c index 68b9ea86b86c..af0e558f231c 100644 --- a/tools/power/acpi/common/cmfsize.c +++ b/tools/power/acpi/common/cmfsize.c @@ -3,7 +3,7 @@ * * Module Name: cmfsize - Common get file size function * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index 6a0cdba6fdfd..3d63626d80e7 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -3,7 +3,7 @@ * * Module Name: getopt * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index 9d70d8c945af..9741e7503591 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -3,7 +3,7 @@ * * Module Name: oslinuxtbl - Linux OSL for obtaining ACPI tables * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ @@ -19,7 +19,7 @@ ACPI_MODULE_NAME("oslinuxtbl") typedef struct osl_table_info { struct osl_table_info *next; u32 instance; - char signature[ACPI_NAMESEG_SIZE]; + char signature[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; } osl_table_info; diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c index 39f3bffd9355..b9bb83116549 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixdir.c +++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c @@ -3,7 +3,7 @@ * * Module Name: osunixdir - Unix directory access interfaces * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index 2b7d56252684..b93ebc9371a5 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -3,7 +3,7 @@ * * Module Name: osunixmap - Unix OSL for file mappings * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c index 46429417c71a..36f27491713c 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixxf.c +++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c @@ -3,7 +3,7 @@ * * Module Name: osunixxf - UNIX OSL interfaces * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index 643e3e722340..fe0d23c4f2ed 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -3,7 +3,7 @@ * * Module Name: acpidump.h - Include file for acpi_dump utility * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index 0742b00b61a1..bf30143efbdc 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -3,7 +3,7 @@ * * Module Name: apdump - Dump routines for ACPI tables (acpidump) * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index 13817f9112c0..75db0091e275 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -3,7 +3,7 @@ * * Module Name: apfiles - File-related functions for acpidump utility * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ @@ -103,7 +103,7 @@ int ap_open_output_file(char *pathname) int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance) { - char filename[ACPI_NAMESEG_SIZE + 16]; + char filename[ACPI_NAMESEG_SIZE + 16] ACPI_NONSTRING; char instance_str[16]; ACPI_FILE file; acpi_size actual; diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index 666a9675e743..9f3850e3af5b 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -3,7 +3,7 @@ * * Module Name: apmain - Main module for the acpidump utility * - * Copyright (C) 2000 - 2023, Intel Corp. + * Copyright (C) 2000 - 2025, Intel Corp. * *****************************************************************************/ diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 51a95239fe06..be8dfac14076 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -2,6 +2,7 @@ # Makefile for cpupower # # Copyright (C) 2005,2006 Dominik Brodowski <linux@dominikbrodowski.net> +# Copyright (C) 2025 Francesco Poli <invernomuto@paranoici.org> # # Based largely on the Makefile for udev by: # @@ -52,8 +53,11 @@ DESTDIR ?= # and _should_ modify the PACKAGE_BUGREPORT definition VERSION:= $(shell ./utils/version-gen.sh) -LIB_MAJ= 0.0.1 -LIB_MIN= 1 +LIB_FIX= 1 +LIB_MIN= 0 +LIB_MAJ= 1 +LIB_VER= $(LIB_MAJ).$(LIB_MIN).$(LIB_FIX) + PACKAGE = cpupower PACKAGE_BUGREPORT = linux-pm@vger.kernel.org @@ -68,6 +72,7 @@ bindir ?= /usr/bin sbindir ?= /usr/sbin mandir ?= /usr/man libdir ?= /usr/lib +libexecdir ?= /usr/libexec includedir ?= /usr/include localedir ?= /usr/share/locale docdir ?= /usr/share/doc/packages/cpupower @@ -80,6 +85,7 @@ CP = cp -fpR INSTALL = /usr/bin/install -c INSTALL_PROGRAM = ${INSTALL} INSTALL_DATA = ${INSTALL} -m 644 +SETPERM_DATA = chmod 644 #bash completion scripts get sourced and so they should be rw only. INSTALL_SCRIPT = ${INSTALL} -m 644 @@ -200,14 +206,14 @@ $(OUTPUT)lib/%.o: $(LIB_SRC) $(LIB_HEADERS) $(ECHO) " CC " $@ $(QUIET) $(CC) $(CFLAGS) -fPIC -o $@ -c lib/$*.c -$(OUTPUT)libcpupower.so.$(LIB_MAJ): $(LIB_OBJS) +$(OUTPUT)libcpupower.so.$(LIB_VER): $(LIB_OBJS) $(ECHO) " LD " $@ $(QUIET) $(CC) -shared $(CFLAGS) $(LDFLAGS) -o $@ \ - -Wl,-soname,libcpupower.so.$(LIB_MIN) $(LIB_OBJS) + -Wl,-soname,libcpupower.so.$(LIB_MAJ) $(LIB_OBJS) @ln -sf $(@F) $(OUTPUT)libcpupower.so - @ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MIN) + @ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MAJ) -libcpupower: $(OUTPUT)libcpupower.so.$(LIB_MAJ) +libcpupower: $(OUTPUT)libcpupower.so.$(LIB_VER) # Let all .o files depend on its .c file and all headers # Might be worth to put this into utils/Makefile at some point of time @@ -217,7 +223,7 @@ $(OUTPUT)%.o: %.c $(ECHO) " CC " $@ $(QUIET) $(CC) $(CFLAGS) -I./lib -I ./utils -o $@ -c $*.c -$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_MAJ) +$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_VER) $(ECHO) " CC " $@ ifeq ($(strip $(STATIC)),true) $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lrt -lpci -L$(OUTPUT) -o $@ @@ -262,7 +268,7 @@ update-po: $(OUTPUT)po/$(PACKAGE).pot done; endif -compile-bench: $(OUTPUT)libcpupower.so.$(LIB_MAJ) +compile-bench: $(OUTPUT)libcpupower.so.$(LIB_VER) @V=$(V) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) # we compile into subdirectories. if the target directory is not the @@ -299,6 +305,13 @@ install-tools: $(OUTPUT)cpupower $(INSTALL_PROGRAM) $(OUTPUT)cpupower $(DESTDIR)${bindir} $(INSTALL) -d $(DESTDIR)${bash_completion_dir} $(INSTALL_SCRIPT) cpupower-completion.sh '$(DESTDIR)${bash_completion_dir}/cpupower' + $(INSTALL) -d $(DESTDIR)${confdir} + $(INSTALL_DATA) cpupower-service.conf '$(DESTDIR)${confdir}' + $(INSTALL) -d $(DESTDIR)${libexecdir} + $(INSTALL_PROGRAM) cpupower.sh '$(DESTDIR)${libexecdir}/cpupower' + $(INSTALL) -d $(DESTDIR)${libdir}/systemd/system + sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${libdir}/systemd/system/cpupower.service' + $(SETPERM_DATA) '$(DESTDIR)${libdir}/systemd/system/cpupower.service' install-man: $(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1 @@ -333,6 +346,9 @@ uninstall: - rm -f $(DESTDIR)${includedir}/cpufreq.h - rm -f $(DESTDIR)${includedir}/cpuidle.h - rm -f $(DESTDIR)${bindir}/utils/cpupower + - rm -f $(DESTDIR)${confdir}cpupower-service.conf + - rm -f $(DESTDIR)${libexecdir}/cpupower + - rm -f $(DESTDIR)${libdir}/systemd/system/cpupower.service - rm -f $(DESTDIR)${mandir}/man1/cpupower.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-info.1 diff --git a/tools/power/cpupower/README b/tools/power/cpupower/README index 2678ed81d311..9de449469568 100644 --- a/tools/power/cpupower/README +++ b/tools/power/cpupower/README @@ -59,6 +59,10 @@ $ sudo make install ----------------------------------------------------------------------- | man pages | /usr/man | ----------------------------------------------------------------------- +| systemd service | /usr/lib/systemd/system | +----------------------------------------------------------------------- +| systemd support script | /usr/libexec | +----------------------------------------------------------------------- To put it in other words it makes build results available system-wide, enabling any user to simply start using it without any additional steps @@ -109,6 +113,10 @@ The files will be installed to the following dirs: ----------------------------------------------------------------------- | man pages | ${DESTDIR}/usr/man | ----------------------------------------------------------------------- +| systemd service | ${DESTDIR}/usr/lib/systemd/system | +----------------------------------------------------------------------- +| systemd support script | ${DESTDIR}/usr/libexec | +----------------------------------------------------------------------- If you look at the table for the default 'make' output dirs you will notice that the only difference with the non-default case is the @@ -173,6 +181,26 @@ The issue is that binary cannot find the 'libcpupower' library. So, we shall point to the lib dir: sudo LD_LIBRARY_PATH=lib64/ ./bin/cpupower +systemd service +--------------- + +A systemd service is also provided to run the cpupower utility at boot with +settings read from a configuration file. + +If you want systemd to find the new service after the installation, the service +unit must have been installed in one of the system unit search path directories +(such as '/usr/lib/systemd/system/', which is the default location) and (unless +you are willing to wait for the next reboot) you need to issue the following +command: + +$ sudo systemctl daemon-reload + +If you want to enable this systemd service, edit '/etc/cpupower-service.conf' +(uncommenting at least one of the options, depending on your preferences) +and then issue the following command: + +$ sudo systemctl enable --now cpupower.service + THANKS ------ diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c index 080678d9d74e..bd67c758b33a 100644 --- a/tools/power/cpupower/bench/parse.c +++ b/tools/power/cpupower/bench/parse.c @@ -121,6 +121,10 @@ out_dir: struct config *prepare_default_config() { struct config *config = malloc(sizeof(struct config)); + if (!config) { + perror("malloc"); + return NULL; + } dprintf("loading defaults\n"); diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile index 741f21477432..81db39a03efb 100644 --- a/tools/power/cpupower/bindings/python/Makefile +++ b/tools/power/cpupower/bindings/python/Makefile @@ -1,22 +1,20 @@ # SPDX-License-Identifier: GPL-2.0-only # Makefile for libcpupower's Python bindings # -# This Makefile expects you have already run the makefile for cpupower to build -# the .o files in the lib directory for the bindings to be created. +# This Makefile expects you have already run `make install-lib` in the lib +# directory for the bindings to be created. CC := gcc HAVE_SWIG := $(shell if which swig >/dev/null 2>&1; then echo 1; else echo 0; fi) HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; else echo 0; fi) -LIB_DIR := ../../lib PY_INCLUDE = $(firstword $(shell python-config --includes)) -OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o) INSTALL_DIR = $(shell python3 -c "import site; print(site.getsitepackages()[0])") all: _raw_pylibcpupower.so _raw_pylibcpupower.so: raw_pylibcpupower_wrap.o - $(CC) -shared $(OBJECTS_LIB) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so + $(CC) -shared -lcpupower raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so raw_pylibcpupower_wrap.o: raw_pylibcpupower_wrap.c $(CC) -fPIC -c raw_pylibcpupower_wrap.c $(PY_INCLUDE) diff --git a/tools/power/cpupower/bindings/python/README b/tools/power/cpupower/bindings/python/README index 952e2e02fd32..2a4896b648b7 100644 --- a/tools/power/cpupower/bindings/python/README +++ b/tools/power/cpupower/bindings/python/README @@ -5,18 +5,21 @@ libcpupower (aside from the libcpupower object files). requirements ------------ -* You need the object files in the libcpupower directory compiled by -cpupower's makefile. +* If you are building completely from upstream; please install libcpupower by +running `make install-lib` within the cpupower directory. This installs the +libcpupower.so file and symlinks needed. Otherwise, please make sure a symlink +to libcpupower.so exists in your library path from your distribution's +packages. * The SWIG program must be installed. -* The Python's development libraries installed. +* The Python's development libraries must be installed. Please check that your version of SWIG is compatible with the version of Python installed on your machine by checking the SWIG changelog on their website. https://swig.org/ Note that while SWIG itself is GPL v3+ licensed; the resulting output, -the bindings code: is permissively licensed + the license of libcpupower's .o -files. For these bindings that means GPL v2. +the bindings code: is permissively licensed + the license of libcpupower's +library files. For these bindings that means GPL v2. Please see https://swig.org/legal.html and the discussion [1] for more details. diff --git a/tools/power/cpupower/cpupower-service.conf b/tools/power/cpupower/cpupower-service.conf new file mode 100644 index 000000000000..02eabe8e3614 --- /dev/null +++ b/tools/power/cpupower/cpupower-service.conf @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2012, Sébastien Luttringer +# Copyright (C) 2024-2025, Francesco Poli <invernomuto@paranoici.org> + +# Configuration file for cpupower.service systemd service unit +# +# Edit this file (uncommenting at least one of the options, depending on +# your preferences) and then enable cpupower.service, if you want cpupower +# to run at boot with these settings. + +# --- CPU clock frequency --- + +# Define CPU governor +# Valid governors: ondemand, performance, powersave, conservative, userspace +#GOVERNOR='ondemand' + +# Limit frequency range +# Valid suffixes: Hz, kHz (default), MHz, GHz, THz +#MIN_FREQ="2.25GHz" +#MAX_FREQ="3GHz" + +# Set a specific frequency +# Requires userspace governor to be available. +# If this option is set, all the previous frequency options are ignored +#FREQ= + +# --- CPU policy --- + +# Set a register on supported Intel processore which allows software to convey +# its policy for the relative importance of performance versus energy savings to +# the processor. See man CPUPOWER-SET(1) for additional details +#PERF_BIAS= diff --git a/tools/power/cpupower/cpupower.service.in b/tools/power/cpupower/cpupower.service.in new file mode 100644 index 000000000000..fbd5b8c14270 --- /dev/null +++ b/tools/power/cpupower/cpupower.service.in @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2012-2020, Sébastien Luttringer +# Copyright (C) 2024-2025, Francesco Poli <invernomuto@paranoici.org> + +[Unit] +Description=Apply cpupower configuration +ConditionVirtualization=!container + +[Service] +Type=oneshot +EnvironmentFile=-___CDIR___cpupower-service.conf +ExecStart=___LDIR___/cpupower +RemainAfterExit=yes + +[Install] +WantedBy=multi-user.target diff --git a/tools/power/cpupower/cpupower.sh b/tools/power/cpupower/cpupower.sh new file mode 100644 index 000000000000..a37dd4cfdb2b --- /dev/null +++ b/tools/power/cpupower/cpupower.sh @@ -0,0 +1,26 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2012, Sébastien Luttringer +# Copyright (C) 2024, Francesco Poli <invernomuto@paranoici.org> + +ESTATUS=0 + +# apply CPU clock frequency options +if test -n "$FREQ" +then + cpupower frequency-set -f "$FREQ" > /dev/null || ESTATUS=1 +elif test -n "${GOVERNOR}${MIN_FREQ}${MAX_FREQ}" +then + cpupower frequency-set \ + ${GOVERNOR:+ -g "$GOVERNOR"} \ + ${MIN_FREQ:+ -d "$MIN_FREQ"} ${MAX_FREQ:+ -u "$MAX_FREQ"} \ + > /dev/null || ESTATUS=1 +fi + +# apply CPU policy options +if test -n "$PERF_BIAS" +then + cpupower set -b "$PERF_BIAS" > /dev/null || ESTATUS=1 +fi + +exit $ESTATUS diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c index 7a2ef691b20e..ce8dfb8e46ab 100644 --- a/tools/power/cpupower/lib/cpupower.c +++ b/tools/power/cpupower/lib/cpupower.c @@ -10,6 +10,7 @@ #include <stdio.h> #include <errno.h> #include <stdlib.h> +#include <string.h> #include "cpupower.h" #include "cpupower_intern.h" @@ -150,15 +151,25 @@ static int __compare(const void *t1, const void *t2) return 0; } +static int __compare_core_cpu_list(const void *t1, const void *t2) +{ + struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1; + struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2; + + return strcmp(top1->core_cpu_list, top2->core_cpu_list); +} + /* * Returns amount of cpus, negative on error, cpu_top must be * passed to cpu_topology_release to free resources * - * Array is sorted after ->pkg, ->core, then ->cpu + * Array is sorted after ->cpu_smt_list ->pkg, ->core */ int get_cpu_topology(struct cpupower_topology *cpu_top) { int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF); + char path[SYSFS_PATH_MAX]; + char *last_cpu_list; cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus); if (cpu_top->core_info == NULL) @@ -183,6 +194,34 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) cpu_top->core_info[cpu].core = -1; continue; } + if (cpu_top->core_info[cpu].core == -1) { + strncpy(cpu_top->core_info[cpu].core_cpu_list, "-1", CPULIST_BUFFER); + continue; + } + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s", + cpu, "core_cpus_list"); + if (cpupower_read_sysfs( + path, + cpu_top->core_info[cpu].core_cpu_list, + CPULIST_BUFFER) < 1) { + printf("Warning CPU%u has a 0 size core_cpus_list string", cpu); + } + } + + /* Count the number of distinct cpu lists to get the physical core + * count. + */ + qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info), + __compare_core_cpu_list); + + last_cpu_list = cpu_top->core_info[0].core_cpu_list; + cpu_top->cores = 1; + for (cpu = 1; cpu < cpus; cpu++) { + if (strcmp(cpu_top->core_info[cpu].core_cpu_list, last_cpu_list) != 0 && + cpu_top->core_info[cpu].pkg != -1) { + last_cpu_list = cpu_top->core_info[cpu].core_cpu_list; + cpu_top->cores++; + } } qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info), @@ -203,13 +242,6 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) if (!(cpu_top->core_info[0].pkg == -1)) cpu_top->pkgs++; - /* Intel's cores count is not consecutively numbered, there may - * be a core_id of 3, but none of 2. Assume there always is 0 - * Get amount of cores by counting duplicates in a package - for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) { - if (cpu_top->core_info[cpu].core == 0) - cpu_top->cores++; - */ return cpus; } diff --git a/tools/power/cpupower/lib/cpupower.h b/tools/power/cpupower/lib/cpupower.h index e4e4292eacec..2e67a080f203 100644 --- a/tools/power/cpupower/lib/cpupower.h +++ b/tools/power/cpupower/lib/cpupower.h @@ -2,6 +2,8 @@ #ifndef __CPUPOWER_CPUPOWER_H__ #define __CPUPOWER_CPUPOWER_H__ +#define CPULIST_BUFFER 5 + struct cpupower_topology { /* Amount of CPU cores, packages and threads per core in the system */ unsigned int cores; @@ -16,6 +18,7 @@ struct cpuid_core_info { int pkg; int core; int cpu; + char core_cpu_list[CPULIST_BUFFER]; /* flags */ unsigned int is_online:1; diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index f746099b5dac..ad493157f826 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -6,6 +6,7 @@ */ +#include <errno.h> #include <stdio.h> #include <unistd.h> #include <stdlib.h> @@ -91,7 +92,11 @@ int fill_string_with_spaces(char *s, int n) return 0; } -#define MAX_COL_WIDTH 6 +#define MAX_COL_WIDTH 6 +#define TOPOLOGY_DEPTH_PKG 3 +#define TOPOLOGY_DEPTH_CORE 2 +#define TOPOLOGY_DEPTH_CPU 1 + void print_header(int topology_depth) { int unsigned mon; @@ -113,12 +118,19 @@ void print_header(int topology_depth) } printf("\n"); - if (topology_depth > 2) + switch (topology_depth) { + case TOPOLOGY_DEPTH_PKG: printf(" PKG|"); - if (topology_depth > 1) + break; + case TOPOLOGY_DEPTH_CORE: printf("CORE|"); - if (topology_depth > 0) + break; + case TOPOLOGY_DEPTH_CPU: printf(" CPU|"); + break; + default: + return; + } for (mon = 0; mon < avail_monitors; mon++) { if (mon != 0) @@ -152,12 +164,19 @@ void print_results(int topology_depth, int cpu) cpu_top.core_info[cpu].pkg == -1) return; - if (topology_depth > 2) + switch (topology_depth) { + case TOPOLOGY_DEPTH_PKG: printf("%4d|", cpu_top.core_info[cpu].pkg); - if (topology_depth > 1) + break; + case TOPOLOGY_DEPTH_CORE: printf("%4d|", cpu_top.core_info[cpu].core); - if (topology_depth > 0) + break; + case TOPOLOGY_DEPTH_CPU: printf("%4d|", cpu_top.core_info[cpu].cpu); + break; + default: + return; + } for (mon = 0; mon < avail_monitors; mon++) { if (mon != 0) @@ -294,7 +313,10 @@ int fork_it(char **argv) if (!child_pid) { /* child */ - execvp(argv[0], argv); + if (execvp(argv[0], argv) == -1) { + printf("Invalid monitor command %s\n", argv[0]); + exit(errno); + } } else { /* parent */ if (child_pid == -1) { @@ -423,11 +445,13 @@ int cmd_monitor(int argc, char **argv) if (avail_monitors == 0) { printf(_("No HW Cstate monitors found\n")); + cpu_topology_release(cpu_top); return 1; } if (mode == list) { list_monitors(); + cpu_topology_release(cpu_top); exit(EXIT_SUCCESS); } @@ -448,15 +472,15 @@ int cmd_monitor(int argc, char **argv) /* ToDo: Topology parsing needs fixing first to do this more generically */ if (cpu_top.pkgs > 1) - print_header(3); + print_header(TOPOLOGY_DEPTH_PKG); else - print_header(1); + print_header(TOPOLOGY_DEPTH_CPU); for (cpu = 0; cpu < cpu_count; cpu++) { if (cpu_top.pkgs > 1) - print_results(3, cpu); + print_results(TOPOLOGY_DEPTH_PKG, cpu); else - print_results(1, cpu); + print_results(TOPOLOGY_DEPTH_CPU, cpu); } for (num = 0; num < avail_monitors; num++) { diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg index 4f80ad7d7275..0321b59518f3 100644 --- a/tools/power/pm-graph/config/custom-timeline-functions.cfg +++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg @@ -122,13 +122,13 @@ freeze_processes: freeze_kernel_threads: pm_restrict_gfp_mask: acpi_suspend_begin: -suspend_console: +console_suspend_all: acpi_pm_prepare: syscore_suspend: arch_enable_nonboot_cpus_end: syscore_resume: acpi_pm_finish: -resume_console: +console_resume_all: acpi_pm_end: pm_restore_gfp_mask: thaw_processes: diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 918eae58b0b4..1555b51a7d55 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -210,13 +210,13 @@ class SystemValues: 'hibernate_preallocate_memory': {}, 'create_basic_memory_bitmaps': {}, 'swsusp_write': {}, - 'suspend_console': {}, + 'console_suspend_all': {}, 'acpi_pm_prepare': {}, 'syscore_suspend': {}, 'arch_enable_nonboot_cpus_end': {}, 'syscore_resume': {}, 'acpi_pm_finish': {}, - 'resume_console': {}, + 'console_resume_all': {}, 'acpi_pm_end': {}, 'pm_restore_gfp_mask': {}, 'thaw_processes': {}, @@ -3459,7 +3459,7 @@ def parseTraceLog(live=False): tracewatch = ['irq_wakeup'] if sysvals.usekprobes: tracewatch += ['sync_filesystems', 'freeze_processes', 'syscore_suspend', - 'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON', + 'syscore_resume', 'console_resume_all', 'thaw_processes', 'CPU_ON', 'CPU_OFF', 'acpi_suspend'] # extract the callgraph and traceevent data @@ -4017,7 +4017,8 @@ def parseKernelLog(data): 'PM: early restore of devices complete after.*'], 'resume_complete': ['PM: resume of devices complete after.*', 'PM: restore of devices complete after.*'], - 'post_resume': [r'.*Restarting tasks \.\.\..*'], + 'post_resume': [r'.*Restarting tasks \.\.\..*', + 'Done restarting tasks.*'], } # action table (expected events that occur and show up in dmesg) diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile index 7221f2f55e8b..8d3a02a20f3d 100644 --- a/tools/power/x86/intel-speed-select/Makefile +++ b/tools/power/x86/intel-speed-select/Makefile @@ -13,7 +13,7 @@ endif # Do not use make's built-in rules # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r -override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I/usr/include/libnl3 +override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I$(shell $(CC) -print-sysroot)/usr/include/libnl3 override LDFLAGS += -lnl-genl-3 -lnl-3 ALL_TARGETS := intel-speed-select diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index fadfb02b8611..0ce251b8d466 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -16,7 +16,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.21"; +static const char *version_str = "v1.23"; static const int supported_api_ver = 3; static struct isst_if_platform_info isst_platform_info; @@ -26,6 +26,7 @@ static FILE *outf; static int cpu_model; static int cpu_stepping; +static int extended_family; #define MAX_CPUS_IN_ONE_REQ 512 static short max_target_cpus; @@ -46,8 +47,9 @@ static int force_online_offline; static int auto_mode; static int fact_enable_fail; static int cgroupv2; +static int max_pkg_id; static int max_die_id; -static int max_punit_id; +static int max_die_id_package_0; /* clos related */ static int current_clos = -1; @@ -142,6 +144,14 @@ int is_icx_platform(void) return 0; } +static int is_dmr_plus_platform(void) +{ + if (extended_family == 0x04) + return 1; + + return 0; +} + static int update_cpu_model(void) { unsigned int ebx, ecx, edx; @@ -149,6 +159,7 @@ static int update_cpu_model(void) __cpuid(1, fms, ebx, ecx, edx); family = (fms >> 8) & 0xf; + extended_family = (fms >> 20) & 0x0f; cpu_model = (fms >> 4) & 0xf; if (family == 6 || family == 0xf) cpu_model += ((fms >> 16) & 0xf) << 4; @@ -557,6 +568,8 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void if (id.pkg < 0 || id.die < 0 || id.punit < 0) continue; + id.die = id.die % (max_die_id_package_0 + 1); + valid_mask[id.pkg][id.die] = 1; if (cpus[id.pkg][id.die][id.punit] == -1) @@ -564,11 +577,11 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void } for (i = 0; i < MAX_PACKAGE_COUNT; i++) { - if (max_die_id == max_punit_id) { + if (max_die_id > max_pkg_id) { for (k = 0; k < MAX_PUNIT_PER_DIE && k < MAX_DIE_PER_PACKAGE; k++) { id.cpu = cpus[i][k][k]; id.pkg = i; - id.die = k; + id.die = get_physical_die_id(id.cpu); id.punit = k; if (isst_is_punit_valid(&id)) callback(&id, arg1, arg2, arg3, arg4); @@ -586,7 +599,10 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void for (k = 0; k < MAX_PUNIT_PER_DIE; k++) { id.cpu = cpus[i][j][k]; id.pkg = i; - id.die = j; + if (id.cpu >= 0) + id.die = get_physical_die_id(id.cpu); + else + id.die = id.pkg; id.punit = k; if (isst_is_punit_valid(&id)) callback(&id, arg1, arg2, arg3, arg4); @@ -788,6 +804,8 @@ static void create_cpu_map(void) cpu_map[i].die_id = die_id; cpu_map[i].core_id = core_id; + if (max_pkg_id < pkg_id) + max_pkg_id = pkg_id; punit_id = 0; @@ -812,8 +830,8 @@ static void create_cpu_map(void) if (max_die_id < die_id) max_die_id = die_id; - if (max_punit_id < cpu_map[i].punit_id) - max_punit_id = cpu_map[i].punit_id; + if (!pkg_id && max_die_id_package_0 < die_id) + max_die_id_package_0 = die_id; debug_printf( "map logical_cpu:%d core: %d die:%d pkg:%d punit:%d punit_cpu:%d punit_core:%d\n", @@ -1509,7 +1527,8 @@ display_result: usleep(2000); /* Adjusting uncore freq */ - isst_adjust_uncore_freq(id, tdp_level, &ctdp_level); + if (!is_dmr_plus_platform()) + isst_adjust_uncore_freq(id, tdp_level, &ctdp_level); fprintf(stderr, "Option is set to online/offline\n"); ctdp_level.core_cpumask_size = diff --git a/tools/power/x86/intel-speed-select/isst-core-tpmi.c b/tools/power/x86/intel-speed-select/isst-core-tpmi.c index da53aaa27fc9..4f389e1c0525 100644 --- a/tools/power/x86/intel-speed-select/isst-core-tpmi.c +++ b/tools/power/x86/intel-speed-select/isst-core-tpmi.c @@ -227,6 +227,7 @@ static int tpmi_get_ctdp_control(struct isst_id *id, int config_index, static int tpmi_get_tdp_info(struct isst_id *id, int config_index, struct isst_pkg_ctdp_level_info *ctdp_level) { + struct isst_perf_level_fabric_info fabric_info; struct isst_perf_level_data_info info; int ret; @@ -253,6 +254,17 @@ static int tpmi_get_tdp_info(struct isst_id *id, int config_index, ctdp_level->uncore_p1 = info.p1_fabric_freq_mhz; ctdp_level->uncore_pm = info.pm_fabric_freq_mhz; + fabric_info.socket_id = id->pkg; + fabric_info.power_domain_id = id->punit; + fabric_info.level = config_index; + + ret = tpmi_process_ioctl(ISST_IF_GET_PERF_LEVEL_FABRIC_INFO, &fabric_info); + if (ret != -1) { + ctdp_level->uncore1_p0 = fabric_info.p0_fabric_freq_mhz[1]; + ctdp_level->uncore1_p1 = fabric_info.p1_fabric_freq_mhz[1]; + ctdp_level->uncore1_pm = fabric_info.pm_fabric_freq_mhz[1]; + } + debug_printf ("cpu:%d ctdp:%d CONFIG_TDP_GET_TDP_INFO tdp_ratio:%d pkg_tdp:%d ctdp_level->t_proc_hot:%d\n", id->cpu, config_index, ctdp_level->tdp_ratio, ctdp_level->pkg_tdp, diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index 07ebd08f3202..e4884eb02837 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -173,7 +173,11 @@ static int print_package_info(struct isst_id *id, FILE *outf) if (out_format_is_json()) { if (api_version() > 1) { - if (id->cpu < 0) + if (id->die < 0 && id->cpu < 0) + snprintf(header, sizeof(header), + "package-%d:die-IO:powerdomain-%d:cpu-None", + id->pkg, id->punit); + else if (id->cpu < 0) snprintf(header, sizeof(header), "package-%d:die-%d:powerdomain-%d:cpu-None", id->pkg, id->die, id->punit); @@ -190,7 +194,10 @@ static int print_package_info(struct isst_id *id, FILE *outf) } snprintf(header, sizeof(header), "package-%d", id->pkg); format_and_print(outf, level++, header, NULL); - snprintf(header, sizeof(header), "die-%d", id->die); + if (id->die < 0) + snprintf(header, sizeof(header), "die-IO"); + else + snprintf(header, sizeof(header), "die-%d", id->die); format_and_print(outf, level++, header, NULL); if (api_version() > 1) { snprintf(header, sizeof(header), "powerdomain-%d", id->punit); @@ -453,6 +460,26 @@ void isst_ctdp_display_information(struct isst_id *id, FILE *outf, int tdp_level format_and_print(outf, level + 2, header, value); } + if (ctdp_level->uncore1_p1) { + snprintf(header, sizeof(header), "uncore-1-frequency-base(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->uncore1_p1 * isst_get_disp_freq_multiplier()); + format_and_print(outf, level + 2, header, value); + } + if (ctdp_level->uncore1_pm) { + snprintf(header, sizeof(header), "uncore-1-frequency-min(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->uncore1_pm * isst_get_disp_freq_multiplier()); + format_and_print(outf, level + 2, header, value); + } + + if (ctdp_level->uncore1_p0) { + snprintf(header, sizeof(header), "uncore-1-frequency-max(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->uncore1_p0 * isst_get_disp_freq_multiplier()); + format_and_print(outf, level + 2, header, value); + } + if (ctdp_level->mem_freq) { snprintf(header, sizeof(header), "max-mem-frequency(MHz)"); snprintf(value, sizeof(value), "%d", diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 39ee75677c2c..960f647cfc2d 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -147,6 +147,9 @@ struct isst_pkg_ctdp_level_info { int uncore_p0; int uncore_p1; int uncore_pm; + int uncore1_p0; + int uncore1_p1; + int uncore1_pm; int sse_p1; int avx2_p1; int avx512_p1; diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 99bf905ade81..b74ed916057e 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -100,7 +100,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. .PP -\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other". +\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "idle_pct". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "idle_pct". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle". .PP \fB--Dump\fP displays the raw counter values. .PP @@ -158,16 +158,22 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs. .PP -\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. +\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. +\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. +\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. These counters are in the "cpuidle" group, which is disabled, by default. +.PP +\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. These counters are in the "pct_idle" group, which is enabled by default. +.PP +\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters and are in the "hwidle" group, which is enabled, by default. .PP \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. .PP \fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. .PP +\fBCoreThr\fP Core Thermal Throttling events during the measurement interval. Note that events since boot can be find in /sys/devices/system/cpu/cpu*/thermal_throttle/* +.PP \fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used. .PP \fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used. @@ -199,6 +205,8 @@ The system configuration dump (if --quiet is not used) is followed by statistics \fBUncMHz\fP per-package uncore MHz, instantaneous sample. .PP \fBUMHz1.0\fP per-package uncore MHz for domain=1 and fabric_cluster=0, instantaneous sample. System summary is the average of all packages. +Intel Granite Rapids systems use domains 0-2 for CPUs, and 3-4 for IO, with cluster always 0. +For the "--show" and "--hide" options, use "UncMHz" to operate on all UMHz*.* as a group. .SH TOO MUCH INFORMATION EXAMPLE By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters. This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 8d5011a0bf60..0170d3cc6819 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -153,7 +153,7 @@ struct msr_counter bic[] = { { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 }, - { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 }, @@ -206,6 +206,7 @@ struct msr_counter bic[] = { { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -219,7 +220,7 @@ struct msr_counter bic[] = { #define BIC_TSC_MHz (1ULL << 7) #define BIC_IRQ (1ULL << 8) #define BIC_SMI (1ULL << 9) -#define BIC_sysfs (1ULL << 10) +#define BIC_cpuidle (1ULL << 10) #define BIC_CPU_c1 (1ULL << 11) #define BIC_CPU_c3 (1ULL << 12) #define BIC_CPU_c6 (1ULL << 13) @@ -272,17 +273,20 @@ struct msr_counter bic[] = { #define BIC_Sys_J (1ULL << 60) #define BIC_NMI (1ULL << 61) #define BIC_CPU_c1e (1ULL << 62) - -#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) -#define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) -#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_pct_idle (1ULL << 63) + +#define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) +#define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) +#define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) +#define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle ) +#define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle) #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle) unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); -unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; +unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC; #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) @@ -1056,7 +1060,7 @@ static const struct platform_data turbostat_pdata[] = { * Missing support for * INTEL_ICELAKE * INTEL_ATOM_SILVERMONT_MID - * INTEL_ATOM_AIRMONT_MID + * INTEL_ATOM_SILVERMONT_MID2 * INTEL_ATOM_AIRMONT_NP */ { 0, NULL }, @@ -1121,7 +1125,7 @@ end: int backwards_count; char *progname; -#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ +#define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */ cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; #define MAX_ADDED_THREAD_COUNTERS 24 @@ -2211,7 +2215,7 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) return 0; } -int probe_msr(int cpu, off_t offset) +int probe_rapl_msr(int cpu, off_t offset, int index) { ssize_t retval; unsigned long long value; @@ -2220,13 +2224,22 @@ int probe_msr(int cpu, off_t offset) retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset); - /* - * Expect MSRs to accumulate some non-zero value since the system was powered on. - * Treat zero as a read failure. - */ - if (retval != sizeof(value) || value == 0) + /* if the read failed, the probe fails */ + if (retval != sizeof(value)) return 1; + /* If an Energy Status Counter MSR returns 0, the probe fails */ + switch (index) { + case RAPL_RCI_INDEX_ENERGY_PKG: + case RAPL_RCI_INDEX_ENERGY_CORES: + case RAPL_RCI_INDEX_DRAM: + case RAPL_RCI_INDEX_GFX: + case RAPL_RCI_INDEX_ENERGY_PLATFORM: + if (value == 0) + return 1; + } + + /* PKG,DRAM_PERF_STATUS MSRs, can return any value */ return 0; } @@ -2345,16 +2358,25 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) retval |= ~0; break; } else if (!strcmp(name_list, "topology")) { - retval |= BIC_TOPOLOGY; + retval |= BIC_GROUP_TOPOLOGY; break; } else if (!strcmp(name_list, "power")) { - retval |= BIC_THERMAL_PWR; + retval |= BIC_GROUP_THERMAL_PWR; break; } else if (!strcmp(name_list, "idle")) { - retval |= BIC_IDLE; + retval |= BIC_GROUP_IDLE; + break; + } else if (!strcmp(name_list, "swidle")) { + retval |= BIC_GROUP_SW_IDLE; + break; + } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */ + retval |= BIC_GROUP_SW_IDLE; + break; + } else if (!strcmp(name_list, "hwidle")) { + retval |= BIC_GROUP_HW_IDLE; break; } else if (!strcmp(name_list, "frequency")) { - retval |= BIC_FREQUENCY; + retval |= BIC_GROUP_FREQUENCY; break; } else if (!strcmp(name_list, "other")) { retval |= BIC_OTHER; @@ -2363,6 +2385,7 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) } if (i == MAX_BIC) { + fprintf(stderr, "deferred %s\n", name_list); if (mode == SHOW_LIST) { deferred_add_names[deferred_add_index++] = name_list; if (deferred_add_index >= MAX_DEFERRED) { @@ -3476,7 +3499,7 @@ void delta_core(struct core_data *new, struct core_data *old) old->c6 = new->c6 - old->c6; old->c7 = new->c7 - old->c7; old->core_temp_c = new->core_temp_c; - old->core_throt_cnt = new->core_throt_cnt; + old->core_throt_cnt = new->core_throt_cnt - old->core_throt_cnt; old->mc6_us = new->mc6_us - old->mc6_us; DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value); @@ -6030,6 +6053,7 @@ int snapshot_graphics(int idx) int retval; rewind(gfx_info[idx].fp); + fflush(gfx_info[idx].fp); switch (idx) { case GFX_rc6: @@ -6703,7 +6727,18 @@ static void probe_intel_uncore_frequency_cluster(void) sprintf(path, "%s/current_freq_khz", path_base); sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); - add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); + /* + * Once add_couter() is called, that counter is always read + * and reported -- So it is effectively (enabled & present). + * Only call add_counter() here if legacy BIC_UNCORE_MHZ (UncMHz) + * is (enabled). Since we are in this routine, we + * know we will not probe and set (present) the legacy counter. + * + * This allows "--show/--hide UncMHz" to be effective for + * the clustered MHz counters, as a group. + */ + if BIC_IS_ENABLED(BIC_UNCORE_MHZ) + add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); if (quiet) continue; @@ -7896,7 +7931,7 @@ void rapl_perf_init(void) rci->flags[cai->rci_index] = cai->flags; /* Use MSR for this counter */ - } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { + } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { rci->source[cai->rci_index] = COUNTER_SOURCE_MSR; rci->msr[cai->rci_index] = cai->msr; rci->msr_mask[cai->rci_index] = cai->msr_mask; @@ -8034,7 +8069,7 @@ void msr_perf_init_(void) cai->present = true; /* User MSR for this counter */ - } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { + } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; cci->msr[cai->rci_index] = cai->msr; cci->msr_mask[cai->rci_index] = cai->msr_mask; @@ -8148,7 +8183,7 @@ void cstate_perf_init_(bool soft_c1) /* User MSR for this counter */ } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit - && probe_msr(cpu, cai->msr) == 0) { + && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; cci->msr[cai->rci_index] = cai->msr; } @@ -9559,7 +9594,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2025.02.02 - Len Brown <lenb@kernel.org>\n"); + fprintf(outf, "turbostat version 2025.04.06 - Len Brown <lenb@kernel.org>\n"); } #define COMMAND_LINE_SIZE 2048 @@ -9592,7 +9627,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) for (mp = head; mp; mp = mp->next) { if (debug) fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name); - if (!strncmp(name, mp->name, strlen(mp->name))) + if (!strcmp(name, mp->name)) return mp; } return NULL; @@ -10239,14 +10274,18 @@ int is_deferred_skip(char *name) return 0; } -void probe_sysfs(void) +void probe_cpuidle_residency(void) { char path[64]; char name_buf[16]; FILE *input; int state; + int min_state = 1024, max_state = 0; char *sp; + if (!DO_BIC(BIC_pct_idle)) + return; + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -10269,14 +10308,32 @@ void probe_sysfs(void) sprintf(path, "cpuidle/state%d/time", state); - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) continue; add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); + + if (state > max_state) + max_state = state; + if (state < min_state) + min_state = state; } +} + +void probe_cpuidle_counts(void) +{ + char path[64]; + char name_buf[16]; + FILE *input; + int state; + int min_state = 1024, max_state = 0; + char *sp; + + if (!DO_BIC(BIC_cpuidle)) + return; for (state = 10; state >= 0; --state) { @@ -10286,26 +10343,52 @@ void probe_sysfs(void) continue; if (!fgets(name_buf, sizeof(name_buf), input)) err(1, "%s: failed to read file", path); - /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ - sp = strchr(name_buf, '-'); - if (!sp) - sp = strchrnul(name_buf, '\n'); - *sp = '\0'; fclose(input); remove_underbar(name_buf); - sprintf(path, "cpuidle/state%d/usage", state); - - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) continue; + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ + sp = strchr(name_buf, '-'); + if (!sp) + sp = strchrnul(name_buf, '\n'); + + /* + * The 'below' sysfs file always contains 0 for the deepest state (largest index), + * do not add it. + */ + if (state != max_state) { + /* + * Add 'C1+' for C1, and so on. The 'below' sysfs file always contains 0 for + * the last state, so do not add it. + */ + + *sp = '+'; + *(sp + 1) = '\0'; + sprintf(path, "cpuidle/state%d/below", state); + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); + } + + *sp = '\0'; + sprintf(path, "cpuidle/state%d/usage", state); add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); - } + /* + * The 'above' sysfs file always contains 0 for the shallowest state (smallest + * index), do not add it. + */ + if (state != min_state) { + *sp = '-'; + *(sp + 1) = '\0'; + sprintf(path, "cpuidle/state%d/above", state); + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0); + } + } } /* @@ -10549,7 +10632,8 @@ skip_cgroup_setting: print_bootcmd(); } - probe_sysfs(); + probe_cpuidle_residency(); + probe_cpuidle_counts(); if (!getuid()) set_rlimit(); |