diff --git a/Makefile b/Makefile index 641aeb4..63170e7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .PHONY : clean debug -OBJS = evga-icx.o evga-card.o icx3.o board-sensors.o -LDLIBS = -li2c +OBJS = evga-icx.o evga-card.o icx3.o board-sensors.o zen3-rapl.o +LDLIBS = -li2c -lm CFLAGS = -MD ifdef USE_NVML diff --git a/README.md b/README.md index 7ff5f76..7fad8a3 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ Available options: --overwrite : Overwrite previously displayed info with --watch and --compact instead of continuously logging new lines --color : Print headers in color in --compact mode for better readability --no-reasons : Do not query NVML for clock reasons (can cause stuttering) +--board : Also print temperatures from the CPU, motherboard, and other sensors ``` ### Examples: diff --git a/board-sensors.h b/board-sensors.h index 4ebd4e6..7ac21aa 100644 --- a/board-sensors.h +++ b/board-sensors.h @@ -1,3 +1,6 @@ +#ifndef BOARD_SENSORS_H +#define BOARD_SENSORS_H + #include #define MAX_SENSOR_NAME_LENGTH 256 @@ -25,6 +28,7 @@ static struct hwmon_sensor hwmon_sensor_info[] = {"zenpower", "temp8", "CPU ", "CCD", "°C", 1000.0, -40.0}, /* Tccd6 */ {"zenpower", "temp9", "CPU ", "CCD", "°C", 1000.0, -40.0}, /* Tccd7 */ {"zenpower", "temp10", "CPU ", "CCD", "°C", 1000.0, -40.0}, /* Tccd8 */ + {"zen-rapl", "", "CPU ", "POW", " W", 0.0, 0.0}, /* Zen RAPL placeholder */ {"asusec", "temp1", "Motherboard ", "CHIP", "°C", 1000.0, -40.0}, /* Chipset */ {"asusec", "temp5", "Motherboard ", "VRM", "°C", 1000.0, -40.0}, /* VRM */ {"asusec", "temp3", "", "MOBO", "°C", 1000.0, -40.0}, /* Motherboard */ @@ -42,4 +46,6 @@ struct hwmon_avail_sensor { }; int find_board_sensors(struct hwmon_avail_sensor *board_sensors, int max_sensors); -int get_sensor_reading(struct hwmon_avail_sensor *sensor, float *reading); \ No newline at end of file +int get_sensor_reading(struct hwmon_avail_sensor *sensor, float *reading); + +#endif \ No newline at end of file diff --git a/evga-icx.c b/evga-icx.c index cfa8442..3fd3084 100644 --- a/evga-icx.c +++ b/evga-icx.c @@ -16,14 +16,19 @@ #include "icx3.h" #include "evga-card.h" #include "board-sensors.h" +#include "zen3-rapl.h" #define MAX_GPUS 16 #define MAX_BOARD_SENSORS 256 #define HEADER_COLOR_START "\x1b[36m" #define HEADER_COLOR_END "\x1b[39m" -char *header_start = ""; -char *header_end = ""; +static char *header_start = ""; +static char *header_end = ""; + +static int zen3_rapl_sensor = -1; /* Board sensor number for the RAPL sensor */ +static int compact = 0; /* Compact one-line per GPU display */ +static int no_reasons = 0; /* Don't probe or display NVML clock reasons */ static const char helpstring[] = "Available options:\n" "--i2c N : Only probe I2C bus N instead of all (may help with stuttering or freezing when probing I2C devices)\n" @@ -43,20 +48,18 @@ static const char helpstring[] = "Available options:\n" "--no-reasons : Do not query NVML for clock reasons (can cause stuttering)\n" "--board : Also print temperatures from the CPU, motherboard, and other sensors"; -void print_gpu_info(int gpu_num, struct card_info gpus[], int compact, int no_reasons); -void print_board_info(struct hwmon_avail_sensor *board_sensors, int num_sensors, int compact); +void print_gpu_info(int gpu_num, struct card_info gpus[]); +void print_board_info(struct hwmon_avail_sensor *board_sensors, int num_sensors); int main (int argc, char **argv) { struct card_info gpus[MAX_GPUS]; struct hwmon_avail_sensor board_sensors[MAX_BOARD_SENSORS]; int gpu_count, board_sensor_count; - int print_info = 0; - int compact = 0; + int print_info = 0; int gpu_num = -1; /* Card to control */ int i2c_bus = -1; /* Specific i2c bus to probe instead of all */ - int overwrite = 0; /* Overwrite printed console info in compact mode */ - int no_reasons = 0; /* Don't probe or display NVML clock reasons */ + int overwrite = 0; /* Overwrite printed console info in compact mode */ unsigned int watch = 0; /* Refresh display every this many seconds */ int print_board_sensors = 0; /* Print CPU/motherbord/other sensors as well */ char *fan_speed[ICX3_MAX_FANS] = {NULL}; @@ -159,8 +162,14 @@ int main (int argc, char **argv) } /* Scan for motherboard/CPU/other sensors */ - if (print_board_sensors) + if (print_board_sensors) { board_sensor_count = find_board_sensors(board_sensors, MAX_BOARD_SENSORS); + if (init_rapl() && board_sensor_count < MAX_BOARD_SENSORS) { + board_sensors[board_sensor_count] = rapl_sensor; + zen3_rapl_sensor = board_sensor_count; + board_sensor_count++; + } + } /* execute fan commands */ if (gpu_num == -1) { @@ -199,17 +208,17 @@ int main (int argc, char **argv) printf("\x1b[K"); /* Clear current console line */ if (print_board_sensors) - print_board_info(board_sensors, board_sensor_count, compact); + print_board_info(board_sensors, board_sensor_count); if (gpu_num == -1) { /* No GPU specified on command line, loop over all supported GPUs */ for (int i = 0; i < gpu_count; i++){ if (i > 0) printf("\n"); - print_gpu_info(i, &gpus[i], compact, no_reasons); + print_gpu_info(i, &gpus[i]); } } else if (gpu_num <= gpu_count - 1) { - print_gpu_info(gpu_num, &gpus[gpu_num], compact, no_reasons); + print_gpu_info(gpu_num, &gpus[gpu_num]); } if (!overwrite) @@ -231,7 +240,7 @@ int main (int argc, char **argv) #endif } -void print_board_info(struct hwmon_avail_sensor *board_sensors, int num_sensors, int compact) +void print_board_info(struct hwmon_avail_sensor *board_sensors, int num_sensors) { int printed_sensors = 0; int current_sort_index = 0; @@ -241,39 +250,56 @@ void print_board_info(struct hwmon_avail_sensor *board_sensors, int num_sensors, char *last_short_name = NULL; char *last_units = NULL; - while (printed_sensors < num_sensors) { - for (int i=0; i < num_sensors; i++){ + float rapl_power; + + for (int i = 0; i < (sizeof(hwmon_sensor_info) / sizeof(struct hwmon_sensor)); i++) { + + /* Inject our Zen RAPL power reading here */ + if (strcmp(hwmon_sensor_info[i].driver_name, "zen-rapl") == 0) { + board_sensors[zen3_rapl_sensor].sort_index = current_sort_index; + board_sensors[zen3_rapl_sensor].sensor_info = &hwmon_sensor_info[i]; + } + + for (int j=0; j < num_sensors; j++) { + struct hwmon_avail_sensor *current_sensor = &board_sensors[j]; + /* Loop over all sensors, but only output those with the current sort index so they come out sort of sorted Duplicates (e.g.) multiple NVMe will come out in whatever sort of order the directory listing happened to */ - if (board_sensors[i].sort_index == current_sort_index) { + if (current_sensor->sort_index == current_sort_index) { printed_sensors++; - good_reading = get_sensor_reading(&board_sensors[i], ¤t_reading); + + if (j == zen3_rapl_sensor) { + good_reading = 1; + current_reading = get_rapl_package_power(); + } else { + good_reading = get_sensor_reading(current_sensor, ¤t_reading); + } if (!good_reading) continue; if (compact) { /* Print units if needed */ - if (last_units != NULL && strcmp(board_sensors[i].sensor_info->units, last_units)) + if (last_units != NULL && strcmp(current_sensor->sensor_info->units, last_units)) printf("%s", last_units); /* Print new section header if needed */ - if (last_short_name == NULL || strcmp(board_sensors[i].sensor_info->short_name, last_short_name)) { + if (last_short_name == NULL || strcmp(current_sensor->sensor_info->short_name, last_short_name)) { if (last_short_name != NULL) /* Spacer for all headings not the first one */ printf(" "); - printf("%s%s%s", header_start, board_sensors[i].sensor_info->short_name, header_end); + printf("%s%s%s", header_start, current_sensor->sensor_info->short_name, header_end); } printf(" %3.0f", current_reading); - last_short_name = board_sensors[i].sensor_info->short_name; - last_units = board_sensors[i].sensor_info->units; + last_short_name = current_sensor->sensor_info->short_name; + last_units = current_sensor->sensor_info->units; } else { printf("%s%s: %+.1f%s\n", - board_sensors[i].sensor_info->name_prefix, - board_sensors[i].sensor_name, + current_sensor->sensor_info->name_prefix, + current_sensor->sensor_name, current_reading, - board_sensors[i].sensor_info->units); + current_sensor->sensor_info->units); } } @@ -287,7 +313,7 @@ void print_board_info(struct hwmon_avail_sensor *board_sensors, int num_sensors, printf("\n"); } -void print_gpu_info(int gpu_num, struct card_info *gpu, int compact, int no_reasons) +void print_gpu_info(int gpu_num, struct card_info *gpu) { if (compact) { /* One line per GPU */ @@ -360,6 +386,5 @@ void print_gpu_info(int gpu_num, struct card_info *gpu, int compact, int no_reas printf("\n"); #endif } - } diff --git a/zen3-rapl.c b/zen3-rapl.c new file mode 100644 index 0000000..8f71760 --- /dev/null +++ b/zen3-rapl.c @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include +#include + +#include "zen3-rapl.h" + +#define AMD_STRING "AuthenticAMD" +#define ZEN_FAMILY 0x17 +#define ZEN3_FAMILY 0x19 + +#define MEASUREMENT_TIME 0.25 + +static float energy_unit = 0; + +static int msr_file = -1; + +static float package_power = 0.0; + +static int check_zen() +{ + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0, ext_family; + char vendor[13]; + + __get_cpuid(0, &eax, &ebx, &ecx, &edx); + + memcpy(vendor, &ebx, 4); + memcpy(vendor+4, &edx, 4); + memcpy(vendor+8, &ecx, 4); + vendor[12] = 0; + + if (strcmp(vendor, AMD_STRING) != 0){ + return 0; + } + + __get_cpuid(1, &eax, &ebx, &ecx, &edx); + + ext_family = ((eax >> 8) & 0xF) + ((eax >> 20) & 0xFF); + if (ext_family != ZEN_FAMILY && ext_family != ZEN3_FAMILY){ + return 0; + } + + return 1; +} + +static int read_msr(int file, unsigned int index, unsigned long long *data) +{ + if (file < 0) + return 0; + + return pread(file, data, sizeof *data, index) == sizeof *data; +} + +static float get_energy_unit() +{ + unsigned long long data; + // AMD OSRR: page 139 - MSRC001_0299 + if (!read_msr(msr_file, 0xC0010299, &data)) + return 0.0; + + return pow(1.0/2.0, (float)((data >> 8) & 0x1F)); +} + +static unsigned long get_package_energy() +{ + unsigned long long data; + // AMD OSRR: page 139 - MSRC001_029B + if (!read_msr(msr_file, 0xC001029B, &data)) + return 0; + + return data; +} + +int init_rapl() +{ + /* Check for supported Zen CPU */ + if (!check_zen()) + return 0; + + /* Open MSR file */ + char *msr_path = "/dev/cpu/0/msr"; + msr_file = open(msr_path, O_RDONLY); + if (msr_file < 0) + return 0; + + /* Energy unit */ + energy_unit = get_energy_unit(); + if (energy_unit == 0) + return 0; + + return 1; +} + +float get_rapl_package_power() +{ + unsigned long package_eng_b = 0; + unsigned long package_eng_a = 0; + + package_eng_b = get_package_energy(); + + usleep(MEASUREMENT_TIME*1000000); + + package_eng_a = get_package_energy(); + + /* Only update if we computed a good power reading, otherwise return the most recent measurement */ + if (package_eng_a >= package_eng_b) + package_power = (package_eng_a - package_eng_b) * energy_unit / MEASUREMENT_TIME; + + return package_power; +} diff --git a/zen3-rapl.h b/zen3-rapl.h new file mode 100644 index 0000000..efe25fe --- /dev/null +++ b/zen3-rapl.h @@ -0,0 +1,6 @@ +#include "board-sensors.h" + +static struct hwmon_avail_sensor rapl_sensor = {"Package power", -1, NULL, NULL}; + +int init_rapl(); +float get_rapl_package_power();