Added support for Zen/Zen3 RAPL package power sensor.

This commit is contained in:
moosecrap 2025-02-27 20:35:24 -08:00
parent 5319945f08
commit 4bb85c461e
6 changed files with 180 additions and 30 deletions

View File

@ -1,7 +1,7 @@
.PHONY : clean debug
OBJS = evga-icx.o evga-card.o icx3.o board-sensors.o
LDLIBS = -li2c
OBJS = evga-icx.o evga-card.o icx3.o board-sensors.o zen3-rapl.o
LDLIBS = -li2c -lm
CFLAGS = -MD
ifdef USE_NVML

View File

@ -56,6 +56,7 @@ Available options:
--overwrite : Overwrite previously displayed info with --watch and --compact instead of continuously logging new lines
--color : Print headers in color in --compact mode for better readability
--no-reasons : Do not query NVML for clock reasons (can cause stuttering)
--board : Also print temperatures from the CPU, motherboard, and other sensors
```
### Examples:

View File

@ -1,3 +1,6 @@
#ifndef BOARD_SENSORS_H
#define BOARD_SENSORS_H
#include <stdio.h>
#define MAX_SENSOR_NAME_LENGTH 256
@ -25,6 +28,7 @@ static struct hwmon_sensor hwmon_sensor_info[] =
{"zenpower", "temp8", "CPU ", "CCD", "°C", 1000.0, -40.0}, /* Tccd6 */
{"zenpower", "temp9", "CPU ", "CCD", "°C", 1000.0, -40.0}, /* Tccd7 */
{"zenpower", "temp10", "CPU ", "CCD", "°C", 1000.0, -40.0}, /* Tccd8 */
{"zen-rapl", "", "CPU ", "POW", " W", 0.0, 0.0}, /* Zen RAPL placeholder */
{"asusec", "temp1", "Motherboard ", "CHIP", "°C", 1000.0, -40.0}, /* Chipset */
{"asusec", "temp5", "Motherboard ", "VRM", "°C", 1000.0, -40.0}, /* VRM */
{"asusec", "temp3", "", "MOBO", "°C", 1000.0, -40.0}, /* Motherboard */
@ -42,4 +46,6 @@ struct hwmon_avail_sensor {
};
int find_board_sensors(struct hwmon_avail_sensor *board_sensors, int max_sensors);
int get_sensor_reading(struct hwmon_avail_sensor *sensor, float *reading);
int get_sensor_reading(struct hwmon_avail_sensor *sensor, float *reading);
#endif

View File

@ -16,14 +16,19 @@
#include "icx3.h"
#include "evga-card.h"
#include "board-sensors.h"
#include "zen3-rapl.h"
#define MAX_GPUS 16
#define MAX_BOARD_SENSORS 256
#define HEADER_COLOR_START "\x1b[36m"
#define HEADER_COLOR_END "\x1b[39m"
char *header_start = "";
char *header_end = "";
static char *header_start = "";
static char *header_end = "";
static int zen3_rapl_sensor = -1; /* Board sensor number for the RAPL sensor */
static int compact = 0; /* Compact one-line per GPU display */
static int no_reasons = 0; /* Don't probe or display NVML clock reasons */
static const char helpstring[] = "Available options:\n"
"--i2c N : Only probe I2C bus N instead of all (may help with stuttering or freezing when probing I2C devices)\n"
@ -43,20 +48,18 @@ static const char helpstring[] = "Available options:\n"
"--no-reasons : Do not query NVML for clock reasons (can cause stuttering)\n"
"--board : Also print temperatures from the CPU, motherboard, and other sensors";
void print_gpu_info(int gpu_num, struct card_info gpus[], int compact, int no_reasons);
void print_board_info(struct hwmon_avail_sensor *board_sensors, int num_sensors, int compact);
void print_gpu_info(int gpu_num, struct card_info gpus[]);
void print_board_info(struct hwmon_avail_sensor *board_sensors, int num_sensors);
int main (int argc, char **argv)
{
struct card_info gpus[MAX_GPUS];
struct hwmon_avail_sensor board_sensors[MAX_BOARD_SENSORS];
int gpu_count, board_sensor_count;
int print_info = 0;
int compact = 0;
int print_info = 0;
int gpu_num = -1; /* Card to control */
int i2c_bus = -1; /* Specific i2c bus to probe instead of all */
int overwrite = 0; /* Overwrite printed console info in compact mode */
int no_reasons = 0; /* Don't probe or display NVML clock reasons */
int overwrite = 0; /* Overwrite printed console info in compact mode */
unsigned int watch = 0; /* Refresh display every this many seconds */
int print_board_sensors = 0; /* Print CPU/motherbord/other sensors as well */
char *fan_speed[ICX3_MAX_FANS] = {NULL};
@ -159,8 +162,14 @@ int main (int argc, char **argv)
}
/* Scan for motherboard/CPU/other sensors */
if (print_board_sensors)
if (print_board_sensors) {
board_sensor_count = find_board_sensors(board_sensors, MAX_BOARD_SENSORS);
if (init_rapl() && board_sensor_count < MAX_BOARD_SENSORS) {
board_sensors[board_sensor_count] = rapl_sensor;
zen3_rapl_sensor = board_sensor_count;
board_sensor_count++;
}
}
/* execute fan commands */
if (gpu_num == -1) {
@ -199,17 +208,17 @@ int main (int argc, char **argv)
printf("\x1b[K"); /* Clear current console line */
if (print_board_sensors)
print_board_info(board_sensors, board_sensor_count, compact);
print_board_info(board_sensors, board_sensor_count);
if (gpu_num == -1) {
/* No GPU specified on command line, loop over all supported GPUs */
for (int i = 0; i < gpu_count; i++){
if (i > 0)
printf("\n");
print_gpu_info(i, &gpus[i], compact, no_reasons);
print_gpu_info(i, &gpus[i]);
}
} else if (gpu_num <= gpu_count - 1) {
print_gpu_info(gpu_num, &gpus[gpu_num], compact, no_reasons);
print_gpu_info(gpu_num, &gpus[gpu_num]);
}
if (!overwrite)
@ -231,7 +240,7 @@ int main (int argc, char **argv)
#endif
}
void print_board_info(struct hwmon_avail_sensor *board_sensors, int num_sensors, int compact)
void print_board_info(struct hwmon_avail_sensor *board_sensors, int num_sensors)
{
int printed_sensors = 0;
int current_sort_index = 0;
@ -241,39 +250,56 @@ void print_board_info(struct hwmon_avail_sensor *board_sensors, int num_sensors,
char *last_short_name = NULL;
char *last_units = NULL;
while (printed_sensors < num_sensors) {
for (int i=0; i < num_sensors; i++){
float rapl_power;
for (int i = 0; i < (sizeof(hwmon_sensor_info) / sizeof(struct hwmon_sensor)); i++) {
/* Inject our Zen RAPL power reading here */
if (strcmp(hwmon_sensor_info[i].driver_name, "zen-rapl") == 0) {
board_sensors[zen3_rapl_sensor].sort_index = current_sort_index;
board_sensors[zen3_rapl_sensor].sensor_info = &hwmon_sensor_info[i];
}
for (int j=0; j < num_sensors; j++) {
struct hwmon_avail_sensor *current_sensor = &board_sensors[j];
/* Loop over all sensors, but only output those with the current sort index so they come out sort of sorted
Duplicates (e.g.) multiple NVMe will come out in whatever sort of order the directory listing happened to */
if (board_sensors[i].sort_index == current_sort_index) {
if (current_sensor->sort_index == current_sort_index) {
printed_sensors++;
good_reading = get_sensor_reading(&board_sensors[i], &current_reading);
if (j == zen3_rapl_sensor) {
good_reading = 1;
current_reading = get_rapl_package_power();
} else {
good_reading = get_sensor_reading(current_sensor, &current_reading);
}
if (!good_reading)
continue;
if (compact) {
/* Print units if needed */
if (last_units != NULL && strcmp(board_sensors[i].sensor_info->units, last_units))
if (last_units != NULL && strcmp(current_sensor->sensor_info->units, last_units))
printf("%s", last_units);
/* Print new section header if needed */
if (last_short_name == NULL || strcmp(board_sensors[i].sensor_info->short_name, last_short_name)) {
if (last_short_name == NULL || strcmp(current_sensor->sensor_info->short_name, last_short_name)) {
if (last_short_name != NULL) /* Spacer for all headings not the first one */
printf(" ");
printf("%s%s%s", header_start, board_sensors[i].sensor_info->short_name, header_end);
printf("%s%s%s", header_start, current_sensor->sensor_info->short_name, header_end);
}
printf(" %3.0f", current_reading);
last_short_name = board_sensors[i].sensor_info->short_name;
last_units = board_sensors[i].sensor_info->units;
last_short_name = current_sensor->sensor_info->short_name;
last_units = current_sensor->sensor_info->units;
} else {
printf("%s%s: %+.1f%s\n",
board_sensors[i].sensor_info->name_prefix,
board_sensors[i].sensor_name,
current_sensor->sensor_info->name_prefix,
current_sensor->sensor_name,
current_reading,
board_sensors[i].sensor_info->units);
current_sensor->sensor_info->units);
}
}
@ -287,7 +313,7 @@ void print_board_info(struct hwmon_avail_sensor *board_sensors, int num_sensors,
printf("\n");
}
void print_gpu_info(int gpu_num, struct card_info *gpu, int compact, int no_reasons)
void print_gpu_info(int gpu_num, struct card_info *gpu)
{
if (compact) {
/* One line per GPU */
@ -360,6 +386,5 @@ void print_gpu_info(int gpu_num, struct card_info *gpu, int compact, int no_reas
printf("\n");
#endif
}
}

112
zen3-rapl.c Normal file
View File

@ -0,0 +1,112 @@
#include <stdio.h>
#include <unistd.h>
#include <cpuid.h>
#include <math.h>
#include <string.h>
#include <fcntl.h>
#include "zen3-rapl.h"
#define AMD_STRING "AuthenticAMD"
#define ZEN_FAMILY 0x17
#define ZEN3_FAMILY 0x19
#define MEASUREMENT_TIME 0.25
static float energy_unit = 0;
static int msr_file = -1;
static float package_power = 0.0;
static int check_zen()
{
unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0, ext_family;
char vendor[13];
__get_cpuid(0, &eax, &ebx, &ecx, &edx);
memcpy(vendor, &ebx, 4);
memcpy(vendor+4, &edx, 4);
memcpy(vendor+8, &ecx, 4);
vendor[12] = 0;
if (strcmp(vendor, AMD_STRING) != 0){
return 0;
}
__get_cpuid(1, &eax, &ebx, &ecx, &edx);
ext_family = ((eax >> 8) & 0xF) + ((eax >> 20) & 0xFF);
if (ext_family != ZEN_FAMILY && ext_family != ZEN3_FAMILY){
return 0;
}
return 1;
}
static int read_msr(int file, unsigned int index, unsigned long long *data)
{
if (file < 0)
return 0;
return pread(file, data, sizeof *data, index) == sizeof *data;
}
static float get_energy_unit()
{
unsigned long long data;
// AMD OSRR: page 139 - MSRC001_0299
if (!read_msr(msr_file, 0xC0010299, &data))
return 0.0;
return pow(1.0/2.0, (float)((data >> 8) & 0x1F));
}
static unsigned long get_package_energy()
{
unsigned long long data;
// AMD OSRR: page 139 - MSRC001_029B
if (!read_msr(msr_file, 0xC001029B, &data))
return 0;
return data;
}
int init_rapl()
{
/* Check for supported Zen CPU */
if (!check_zen())
return 0;
/* Open MSR file */
char *msr_path = "/dev/cpu/0/msr";
msr_file = open(msr_path, O_RDONLY);
if (msr_file < 0)
return 0;
/* Energy unit */
energy_unit = get_energy_unit();
if (energy_unit == 0)
return 0;
return 1;
}
float get_rapl_package_power()
{
unsigned long package_eng_b = 0;
unsigned long package_eng_a = 0;
package_eng_b = get_package_energy();
usleep(MEASUREMENT_TIME*1000000);
package_eng_a = get_package_energy();
/* Only update if we computed a good power reading, otherwise return the most recent measurement */
if (package_eng_a >= package_eng_b)
package_power = (package_eng_a - package_eng_b) * energy_unit / MEASUREMENT_TIME;
return package_power;
}

6
zen3-rapl.h Normal file
View File

@ -0,0 +1,6 @@
#include "board-sensors.h"
static struct hwmon_avail_sensor rapl_sensor = {"Package power", -1, NULL, NULL};
int init_rapl();
float get_rapl_package_power();