Compare commits
17 Commits
Author | SHA1 | Date | |
---|---|---|---|
af2ceec405 | |||
083d296f5b | |||
f7021d7197 | |||
a9f8754922 | |||
c64518531d | |||
6041c13bb9 | |||
7b80a78763 | |||
4bb85c461e | |||
5319945f08 | |||
edd575042e | |||
60caeaf0ef | |||
5e78a059a6 | |||
20e2683110 | |||
b7d22ed9ac | |||
46eb773820 | |||
dc63b57bbe | |||
7cd23384f6 |
6
Makefile
6
Makefile
@ -1,7 +1,7 @@
|
|||||||
.PHONY : clean debug
|
.PHONY : clean debug
|
||||||
|
|
||||||
OBJS = evga-icx.o evga-card.o icx3.o
|
OBJS = evga-icx.o evga-card.o icx3.o board-sensors.o zen3-rapl.o
|
||||||
LDLIBS = -li2c
|
LDLIBS = -li2c -lm
|
||||||
CFLAGS = -MD
|
CFLAGS = -MD
|
||||||
|
|
||||||
ifdef USE_NVML
|
ifdef USE_NVML
|
||||||
@ -18,7 +18,7 @@ endif
|
|||||||
|
|
||||||
evga-icx : $(OBJS)
|
evga-icx : $(OBJS)
|
||||||
|
|
||||||
debug : CFLAGS += -g -O0
|
debug : CFLAGS += -g -Og
|
||||||
debug : evga-icx
|
debug : evga-icx
|
||||||
|
|
||||||
clean :
|
clean :
|
||||||
|
10
README.md
10
README.md
@ -3,7 +3,7 @@
|
|||||||
This program allows you to read temperature sensors off of supported EVGA 30-series iCX3 video cards, as well as control the fans individually.
|
This program allows you to read temperature sensors off of supported EVGA 30-series iCX3 video cards, as well as control the fans individually.
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
A supported EVGA 30-series iCX3 card. I have not done extensive testing but belive this is every model of their:
|
A supported EVGA 30-series card with iCX3. This includes:
|
||||||
* RTX 3060 Ti
|
* RTX 3060 Ti
|
||||||
* RTX 3070
|
* RTX 3070
|
||||||
* RTX 3070 Ti
|
* RTX 3070 Ti
|
||||||
@ -14,6 +14,8 @@ A supported EVGA 30-series iCX3 card. I have not done extensive testing but bel
|
|||||||
|
|
||||||
The number of fans supported depends, of course, on your particular model.
|
The number of fans supported depends, of course, on your particular model.
|
||||||
|
|
||||||
|
You must have the `i2c-dev` kernel module loaded with `modprobe i2c-dev`
|
||||||
|
|
||||||
Access to the `/dev/i2c` device files, which means either:
|
Access to the `/dev/i2c` device files, which means either:
|
||||||
* Run as root, or
|
* Run as root, or
|
||||||
* Install udev rules to allow user access. If you have the OpenRGB udev rules installed to control the LEDs you already have this set up.
|
* Install udev rules to allow user access. If you have the OpenRGB udev rules installed to control the LEDs you already have this set up.
|
||||||
@ -34,11 +36,15 @@ Add the make flag `USE_NVML=1` and the it will also display the main GPU tempera
|
|||||||
### VRAM and Hotspot temperature
|
### VRAM and Hotspot temperature
|
||||||
Add the make flag `USE_LIBPCI=1` and you can also read the VRAM and "hotspot" temperatures. These require direct memory access to the PCI device so you must run as root and also enable the kernel parameter `iomem=relaxed`. These sensors are **extremely** undocumented so I can't say anything about their accuracy.
|
Add the make flag `USE_LIBPCI=1` and you can also read the VRAM and "hotspot" temperatures. These require direct memory access to the PCI device so you must run as root and also enable the kernel parameter `iomem=relaxed`. These sensors are **extremely** undocumented so I can't say anything about their accuracy.
|
||||||
|
|
||||||
|
### Hardware monitoring
|
||||||
|
This program can also monitor hardware sensors using the standard linux `hwmon` API. See board-sensors.h for examples of how to configure these, they must be staticlly compiled in.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
Note that when controlling fans directly through iCX3 they will fall offline from the Nvidia driver and show as 0 RPM until you return them to automatic mode.
|
Note that when controlling fans directly through iCX3 they will fall offline from the Nvidia driver and show as 0 RPM until you return them to automatic mode.
|
||||||
|
|
||||||
```text
|
```text
|
||||||
Available options:
|
Available options:
|
||||||
|
--i2c N : Only probe I2C bus N instead of all (may help with stuttering or freezing when probing I2C devices)
|
||||||
--gpu N : Control only GPU N instead of all supported cards
|
--gpu N : Control only GPU N instead of all supported cards
|
||||||
--fan SPEED : Set all fans at once to SPEED (see below)
|
--fan SPEED : Set all fans at once to SPEED (see below)
|
||||||
--fanN SPEED : Set fan N (0-3) to SPEED
|
--fanN SPEED : Set fan N (0-3) to SPEED
|
||||||
@ -52,6 +58,8 @@ Available options:
|
|||||||
--watch N : Keep printing output every N seconds
|
--watch N : Keep printing output every N seconds
|
||||||
--overwrite : Overwrite previously displayed info with --watch and --compact instead of continuously logging new lines
|
--overwrite : Overwrite previously displayed info with --watch and --compact instead of continuously logging new lines
|
||||||
--color : Print headers in color in --compact mode for better readability
|
--color : Print headers in color in --compact mode for better readability
|
||||||
|
--no-reasons : Do not query NVML for clock reasons (can cause stuttering)
|
||||||
|
--board : Also print temperatures from the CPU, motherboard, and other sensors
|
||||||
```
|
```
|
||||||
|
|
||||||
### Examples:
|
### Examples:
|
||||||
|
122
board-sensors.c
Normal file
122
board-sensors.c
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
#include <dirent.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "board-sensors.h"
|
||||||
|
|
||||||
|
int find_board_sensors(struct hwmon_avail_sensor *board_sensors, int max_sensors)
|
||||||
|
{
|
||||||
|
const char *hwmon_path = "/sys/class/hwmon/";
|
||||||
|
char device_path[NAME_MAX];
|
||||||
|
char sensor_path[NAME_MAX];
|
||||||
|
|
||||||
|
char driver_name[256];
|
||||||
|
|
||||||
|
FILE *file;
|
||||||
|
DIR *dir;
|
||||||
|
struct dirent *ent;
|
||||||
|
|
||||||
|
int num_sensors = 0;
|
||||||
|
|
||||||
|
/* Start looking for hwmon devices in /sys/class/hwmon/ */
|
||||||
|
dir = opendir(hwmon_path);
|
||||||
|
|
||||||
|
/* make sure we can open the device directory */
|
||||||
|
if(dir == NULL)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* loop over all hwmon devices */
|
||||||
|
while((ent = readdir(dir)) != NULL)
|
||||||
|
{
|
||||||
|
/* Ignore any non-hwmon dirs */
|
||||||
|
if(strncmp(ent->d_name, "hwmon", 5) != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
strcpy(device_path, hwmon_path);
|
||||||
|
strcat(device_path, ent->d_name);
|
||||||
|
|
||||||
|
/* Read in the name of the device */
|
||||||
|
strcpy(sensor_path, device_path);
|
||||||
|
strcat(sensor_path, "/name");
|
||||||
|
|
||||||
|
file = fopen(sensor_path, "r");
|
||||||
|
if (file == NULL)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (fgets(driver_name, sizeof(driver_name), file) == NULL) {
|
||||||
|
fclose(file);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(file);
|
||||||
|
|
||||||
|
/* Driver names have a linebreak at the end so let's remove that for comparison*/
|
||||||
|
driver_name[strlen(driver_name) - 1] = '\0';
|
||||||
|
|
||||||
|
/* Loop through all supported sensors and see if any are present in this device */
|
||||||
|
for (int i = 0; i < (sizeof(hwmon_sensor_info) / sizeof(struct hwmon_sensor)); i++) {
|
||||||
|
if (strcmp(driver_name, hwmon_sensor_info[i].driver_name) == 0) {
|
||||||
|
/* We matched the driver name, try to open the files */
|
||||||
|
strcpy(sensor_path, device_path);
|
||||||
|
strcat(sensor_path, "/");
|
||||||
|
strcat(sensor_path, hwmon_sensor_info[i].sensor_file_name);
|
||||||
|
strcat(sensor_path, "_input");
|
||||||
|
|
||||||
|
file = fopen(sensor_path, "r");
|
||||||
|
if (file != NULL) {
|
||||||
|
fclose(file);
|
||||||
|
/* Good open of the sensor file */
|
||||||
|
board_sensors[num_sensors].file = calloc(NAME_MAX, sizeof(char));
|
||||||
|
strcpy(board_sensors[num_sensors].file, sensor_path);
|
||||||
|
board_sensors[num_sensors].sort_index = i;
|
||||||
|
board_sensors[num_sensors].sensor_info = &hwmon_sensor_info[i];
|
||||||
|
|
||||||
|
/* Read in the sensor name */
|
||||||
|
board_sensors[num_sensors].sensor_name = calloc(MAX_SENSOR_NAME_LENGTH, sizeof(char));
|
||||||
|
strcpy(sensor_path, device_path);
|
||||||
|
strcat(sensor_path, "/");
|
||||||
|
strcat(sensor_path, hwmon_sensor_info[i].sensor_file_name);
|
||||||
|
strcat(sensor_path, "_label");
|
||||||
|
|
||||||
|
file = fopen(sensor_path, "r");
|
||||||
|
if (file != NULL)
|
||||||
|
fgets(board_sensors[num_sensors].sensor_name, MAX_SENSOR_NAME_LENGTH, file);
|
||||||
|
|
||||||
|
/* Sensor name seems to always have a trailing newline we don't want */
|
||||||
|
size_t len_without_newline = strcspn(board_sensors[num_sensors].sensor_name, "\n");
|
||||||
|
board_sensors[num_sensors].sensor_name[len_without_newline] = '\0';
|
||||||
|
|
||||||
|
if (num_sensors == max_sensors)
|
||||||
|
return num_sensors;
|
||||||
|
num_sensors++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return num_sensors;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns 0 on a bad read or missing sensor, 1 on OK */
|
||||||
|
int get_sensor_reading(struct hwmon_avail_sensor *sensor, float *reading) {
|
||||||
|
char buf[256] = {0};
|
||||||
|
long int raw;
|
||||||
|
FILE *file;
|
||||||
|
|
||||||
|
file = fopen(sensor->file, "r");
|
||||||
|
if (file == NULL)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
fgets(buf, 256, file);
|
||||||
|
raw = strtol(buf, NULL, 10);
|
||||||
|
|
||||||
|
fclose(file);
|
||||||
|
|
||||||
|
*reading = (float)raw / sensor->sensor_info->divisor;
|
||||||
|
|
||||||
|
if (*reading == sensor->sensor_info->bad_value)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
60
board-sensors.h
Normal file
60
board-sensors.h
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
#ifndef BOARD_SENSORS_H
|
||||||
|
#define BOARD_SENSORS_H
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#define MAX_SENSOR_NAME_LENGTH 256
|
||||||
|
|
||||||
|
struct hwmon_sensor {
|
||||||
|
char *driver_name; /* Contents of /sys/class/hwmon/hwmonX/name */
|
||||||
|
char *sensor_file_name; /* Sysfs file to read */
|
||||||
|
char *name_prefix; /* Prefix to attach to temp*_label for clarity */
|
||||||
|
char *short_name; /* 'Category' name when using compact mode */
|
||||||
|
char *units; /* Units string */
|
||||||
|
float divisor; /* Divisor to convert temp* to units */
|
||||||
|
float bad_value; /* Raw value that indicates a bad (missing) sensor TODO: verfiy most of these*/
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Note the order here matters, it's the order these will be printed in */
|
||||||
|
static struct hwmon_sensor hwmon_sensor_info[] =
|
||||||
|
{
|
||||||
|
{"zenpower", "temp1", "CPU ", "CPU", "°C", 1000.0, -40.0 }, /* Tdie */
|
||||||
|
{"asusec", "temp2", "Motherboard ", "CPU", "°C", 1000.0, -40.0 }, /* CPU */
|
||||||
|
{"zenpower", "temp3", "CPU ", "CCD", "°C", 1000.0, -40.0 }, /* Tccd1 */
|
||||||
|
{"zenpower", "temp4", "CPU ", "CCD", "°C", 1000.0, -40.0 }, /* Tccd2 */
|
||||||
|
{"zenpower", "temp5", "CPU ", "CCD", "°C", 1000.0, -40.0 }, /* Tccd3 */
|
||||||
|
{"zenpower", "temp6", "CPU ", "CCD", "°C", 1000.0, -40.0 }, /* Tccd4 */
|
||||||
|
{"zenpower", "temp7", "CPU ", "CCD", "°C", 1000.0, -40.0 }, /* Tccd5 */
|
||||||
|
{"zenpower", "temp8", "CPU ", "CCD", "°C", 1000.0, -40.0 }, /* Tccd6 */
|
||||||
|
{"zenpower", "temp9", "CPU ", "CCD", "°C", 1000.0, -40.0 }, /* Tccd7 */
|
||||||
|
{"zenpower", "temp10", "CPU ", "CCD", "°C", 1000.0, -40.0 }, /* Tccd8 */
|
||||||
|
{"zen-rapl", "", "CPU ", "POW", " W", 0.0, 0.0 }, /* Zen RAPL placeholder */
|
||||||
|
{"asusec", "temp1", "Motherboard ", "CHIP", "°C", 1000.0, -40.0 }, /* Chipset */
|
||||||
|
{"asusec", "temp5", "Motherboard ", "VRM", "°C", 1000.0, -40.0 }, /* VRM */
|
||||||
|
{"asusec", "temp3", "", "MOBO", "°C", 1000.0, -40.0 }, /* Motherboard */
|
||||||
|
{"asusec", "temp4", "Motherboard ", "SENS", "°C", 1000.0, -40.0 }, /* T_Sensor */
|
||||||
|
{"asusec", "temp6", "Motherboard ", "H2O", "°C", 1000.0, -40.0 }, /* Water_In */
|
||||||
|
{"asusec", "temp7", "Motherboard ", "H2O", "°C", 1000.0, -40.0 }, /* Water_Out */
|
||||||
|
{"nvme", "temp1", "NVMe ", "NVME", "°C", 1000.0, -40.0 }, /* NVME Composite */
|
||||||
|
{"nct6798", "fan2", "CPU fan", "CPU", "%", 15.0, 0.0 }, /* cpu_fan, cpu mid */
|
||||||
|
{"asusec", "fan1", "", "CPU", "%", 15.0, 0.0 }, /* cpu_opt, cpu front */
|
||||||
|
{"nct6798", "fan5", "H amp", "CHA", "%", 12.0, 0.0 }, /* h_amp, front fan */
|
||||||
|
{"nct6798", "fan3", "Chassis 2", "CHA", "%", 12.0, 0.0 }, /* cha2, top front */
|
||||||
|
{"nct6798", "fan1", "Chassis 1", "CHA", "%", 12.0, 0.0 }, /* cha1, top rear */
|
||||||
|
{"nct6798", "fan4", "Chassis 3", "CHA", "%", 13.0, 0.0 }, /* cha3, rear */
|
||||||
|
{"nct6798", "fan6", "AIO pump", "CHA", "%", 12.0, 0.0 }, /* aio_pump? */
|
||||||
|
{"nct6798", "fan7", "W pump", "CHA", "%", 12.0, 0.0 }, /* w_pump+? */
|
||||||
|
{"asusec", "fan2", "", "CHIP", "%", 35.0, 0.0 }, /* chipset */
|
||||||
|
};
|
||||||
|
|
||||||
|
struct hwmon_avail_sensor {
|
||||||
|
char *sensor_name; /* Sensor name as read from the sysfs file */
|
||||||
|
int sort_index; /* Sort index for order to display in */
|
||||||
|
char *file; /* File to read from */
|
||||||
|
struct hwmon_sensor *sensor_info; /* Associated sensor info struct */
|
||||||
|
};
|
||||||
|
|
||||||
|
int find_board_sensors(struct hwmon_avail_sensor *board_sensors, int max_sensors);
|
||||||
|
int get_sensor_reading(struct hwmon_avail_sensor *sensor, float *reading);
|
||||||
|
|
||||||
|
#endif
|
34
evga-card.c
34
evga-card.c
@ -8,11 +8,12 @@
|
|||||||
|
|
||||||
/* Search all i2c device files for ones are on a PCI device of a supported GPU,
|
/* Search all i2c device files for ones are on a PCI device of a supported GPU,
|
||||||
and respond with the correct iCX3 version information */
|
and respond with the correct iCX3 version information */
|
||||||
int find_evga_gpu_i2cs(struct card_info *infos, int max_gpus)
|
int find_evga_gpu_i2cs(struct card_info *infos, int max_gpus, int i2c_bus)
|
||||||
{
|
{
|
||||||
char i2c_devices_path[NAME_MAX];
|
char i2c_devices_path[PATH_MAX];
|
||||||
char device_path[NAME_MAX];
|
char device_path[PATH_MAX];
|
||||||
char dev_file[NAME_MAX];
|
char dev_file[PATH_MAX];
|
||||||
|
char pci_path[PATH_MAX];
|
||||||
char *pci_addr;
|
char *pci_addr;
|
||||||
|
|
||||||
FILE *test_fd;
|
FILE *test_fd;
|
||||||
@ -20,6 +21,7 @@ int find_evga_gpu_i2cs(struct card_info *infos, int max_gpus)
|
|||||||
struct dirent *ent;
|
struct dirent *ent;
|
||||||
|
|
||||||
int num_gpus = 0;
|
int num_gpus = 0;
|
||||||
|
int current_i2c_bus = -1;
|
||||||
unsigned short pci_vendor, pci_device, pci_subsystem_vendor, pci_subsystem_device = 0;
|
unsigned short pci_vendor, pci_device, pci_subsystem_vendor, pci_subsystem_device = 0;
|
||||||
|
|
||||||
/* Start looking for I2C adapters in /sys/bus/i2c/devices/ */
|
/* Start looking for I2C adapters in /sys/bus/i2c/devices/ */
|
||||||
@ -37,6 +39,14 @@ int find_evga_gpu_i2cs(struct card_info *infos, int max_gpus)
|
|||||||
if(strncmp(ent->d_name, "i2c-", 4) != 0)
|
if(strncmp(ent->d_name, "i2c-", 4) != 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
/* Only probe the specific device given (if provided) */
|
||||||
|
if (i2c_bus >= 0) {
|
||||||
|
sscanf(ent->d_name, "i2c-%i", ¤t_i2c_bus);
|
||||||
|
if (current_i2c_bus != i2c_bus)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
strcpy(device_path, i2c_devices_path);
|
strcpy(device_path, i2c_devices_path);
|
||||||
strcat(device_path, ent->d_name);
|
strcat(device_path, ent->d_name);
|
||||||
|
|
||||||
@ -46,10 +56,14 @@ int find_evga_gpu_i2cs(struct card_info *infos, int max_gpus)
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Read the PCI info for the underlying device */
|
/* Read the PCI info for the underlying device */
|
||||||
pci_vendor = read_pci_id(device_path, "/device/vendor");
|
strcat(device_path, "/../");
|
||||||
pci_device = read_pci_id(device_path, "/device/device");
|
if (realpath(device_path, pci_path) == NULL)
|
||||||
pci_subsystem_vendor = read_pci_id(device_path, "/device/subsystem_vendor");
|
continue;
|
||||||
pci_subsystem_device = read_pci_id(device_path, "/device/subsystem_device");
|
|
||||||
|
pci_vendor = read_pci_id(pci_path, "/vendor");
|
||||||
|
pci_device = read_pci_id(pci_path, "/device");
|
||||||
|
pci_subsystem_vendor = read_pci_id(pci_path, "/subsystem_vendor");
|
||||||
|
pci_subsystem_device = read_pci_id(pci_path, "/subsystem_device");
|
||||||
|
|
||||||
/* See if it's a matching device for a supported EVGA card */
|
/* See if it's a matching device for a supported EVGA card */
|
||||||
for (int i = 0; i < (sizeof(evga_pci_ids) / sizeof(struct gpu_pci_info)); i++) {
|
for (int i = 0; i < (sizeof(evga_pci_ids) / sizeof(struct gpu_pci_info)); i++) {
|
||||||
@ -88,7 +102,7 @@ int find_evga_gpu_i2cs(struct card_info *infos, int max_gpus)
|
|||||||
unsigned short read_pci_id(char *device_path, char *field)
|
unsigned short read_pci_id(char *device_path, char *field)
|
||||||
{
|
{
|
||||||
char buf[16];
|
char buf[16];
|
||||||
char file_path[NAME_MAX];
|
char file_path[PATH_MAX];
|
||||||
|
|
||||||
strcpy(file_path, device_path);
|
strcpy(file_path, device_path);
|
||||||
strcat(file_path, field);
|
strcat(file_path, field);
|
||||||
@ -110,7 +124,7 @@ unsigned short read_pci_id(char *device_path, char *field)
|
|||||||
|
|
||||||
char *read_nvidia_pci_address(char *device_path)
|
char *read_nvidia_pci_address(char *device_path)
|
||||||
{
|
{
|
||||||
char file_path[NAME_MAX];
|
char file_path[PATH_MAX];
|
||||||
|
|
||||||
char *ret = calloc(16 + 1, sizeof(char)); /* assuming pci ids could look as large as 00000000:0C:00.0 */
|
char *ret = calloc(16 + 1, sizeof(char)); /* assuming pci ids could look as large as 00000000:0C:00.0 */
|
||||||
|
|
||||||
|
@ -91,6 +91,9 @@ struct card_info {
|
|||||||
int i2c_fd; /* File descriptor for the i2c device file, for re-use */
|
int i2c_fd; /* File descriptor for the i2c device file, for re-use */
|
||||||
int product_id; /* EVGA internal product ID, as reported by the iCX3 controller */
|
int product_id; /* EVGA internal product ID, as reported by the iCX3 controller */
|
||||||
unsigned int bar0; /* Address of the card's PCI base address register */
|
unsigned int bar0; /* Address of the card's PCI base address register */
|
||||||
|
void *nvml_device; /* Pointer to nvmlDevice_t for use in NVML calls */
|
||||||
|
void *vram_addr; /* Memory mapping for GDDR6 temps */
|
||||||
|
void *hotspot_addr; /* Memory mapping for hotspot temperature */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct gpu_pci_info {
|
struct gpu_pci_info {
|
||||||
@ -162,7 +165,7 @@ static struct gpu_pci_info evga_pci_ids[] =
|
|||||||
{"EVGA GeForce RTX 3090 Ti FTW3 Ultra Gaming" , NVIDIA_VEN, NVIDIA_RTX3090TI_DEV, EVGA_SUB_VEN, EVGA_RTX3090TI_FTW3_ULTRA_GAMING_SUB_DEV }
|
{"EVGA GeForce RTX 3090 Ti FTW3 Ultra Gaming" , NVIDIA_VEN, NVIDIA_RTX3090TI_DEV, EVGA_SUB_VEN, EVGA_RTX3090TI_FTW3_ULTRA_GAMING_SUB_DEV }
|
||||||
};
|
};
|
||||||
|
|
||||||
int find_evga_gpu_i2cs(struct card_info *infos, int max_gpus);
|
int find_evga_gpu_i2cs(struct card_info *infos, int max_gpus, int i2c_bus);
|
||||||
unsigned short read_pci_id(char *device_path, char *field);
|
unsigned short read_pci_id(char *device_path, char *field);
|
||||||
char *read_nvidia_pci_address(char *device_path);
|
char *read_nvidia_pci_address(char *device_path);
|
||||||
|
|
||||||
|
173
evga-icx.c
173
evga-icx.c
@ -15,16 +15,24 @@
|
|||||||
|
|
||||||
#include "icx3.h"
|
#include "icx3.h"
|
||||||
#include "evga-card.h"
|
#include "evga-card.h"
|
||||||
|
#include "board-sensors.h"
|
||||||
|
#include "zen3-rapl.h"
|
||||||
|
|
||||||
#define MAX_GPUS 16
|
#define MAX_GPUS 16
|
||||||
|
#define MAX_BOARD_SENSORS 256
|
||||||
#define HEADER_COLOR_START "\x1b[36m"
|
#define HEADER_COLOR_START "\x1b[36m"
|
||||||
#define HEADER_COLOR_END "\x1b[39m"
|
#define HEADER_COLOR_END "\x1b[39m"
|
||||||
|
|
||||||
char *header_start = "";
|
static char *header_start = "";
|
||||||
char *header_end = "";
|
static char *header_end = "";
|
||||||
|
|
||||||
|
static int zen3_rapl_sensor = -1; /* Board sensor number for the RAPL sensor */
|
||||||
|
static int compact = 0; /* Compact one-line per GPU display */
|
||||||
|
static int no_reasons = 0; /* Don't probe or display NVML clock reasons */
|
||||||
|
|
||||||
static const char helpstring[] = "Available options:\n"
|
static const char helpstring[] = "Available options:\n"
|
||||||
"--gpu N : Control only GPU N instead of all supported cards\n"
|
"--i2c N : Only probe I2C bus N instead of all (may help with stuttering or freezing when probing I2C devices)\n"
|
||||||
|
"--gpu N : Control only GPU N instead of all supported cards\n"
|
||||||
"--fan SPEED : Set all fans at once to SPEED (see below)\n"
|
"--fan SPEED : Set all fans at once to SPEED (see below)\n"
|
||||||
"--fanN SPEED : Set fan N (0-3) to SPEED\n"
|
"--fanN SPEED : Set fan N (0-3) to SPEED\n"
|
||||||
" SPEED may be one of the following:\n"
|
" SPEED may be one of the following:\n"
|
||||||
@ -36,24 +44,37 @@ static const char helpstring[] = "Available options:\n"
|
|||||||
"--compact : Print sensor reading in a compact one-line per card format\n"
|
"--compact : Print sensor reading in a compact one-line per card format\n"
|
||||||
"--watch N : Keep printing output every N seconds\n"
|
"--watch N : Keep printing output every N seconds\n"
|
||||||
"--overwrite : Overwrite previously displayed info with --watch and --compact instead of continuously logging\n"
|
"--overwrite : Overwrite previously displayed info with --watch and --compact instead of continuously logging\n"
|
||||||
"--color : Print headers in color in --compact mode for better readability\n";
|
"--color : Print headers in color in --compact mode for better readability\n"
|
||||||
|
"--no-reasons : Do not query NVML for clock reasons (can cause stuttering)\n"
|
||||||
|
"--board : Also print temperatures from the CPU, motherboard, and other sensors\n";
|
||||||
|
|
||||||
void print_gpu_info(int gpu_num, struct card_info gpus[], int compact);
|
void print_gpu_info(int gpu_num, struct card_info gpus[]);
|
||||||
|
void print_board_info(struct hwmon_avail_sensor *board_sensors, int num_sensors);
|
||||||
|
|
||||||
int main (int argc, char **argv)
|
int main (int argc, char **argv)
|
||||||
{
|
{
|
||||||
struct card_info gpus[MAX_GPUS];
|
struct card_info gpus[MAX_GPUS];
|
||||||
int gpu_count;
|
struct hwmon_avail_sensor board_sensors[MAX_BOARD_SENSORS];
|
||||||
|
int gpu_count, board_sensor_count;
|
||||||
int print_info = 0;
|
int print_info = 0;
|
||||||
int compact = 0;
|
int gpu_num = -1; /* Card to control */
|
||||||
int gpu_num = -1; /* Card to control */
|
int i2c_bus = -1; /* Specific i2c bus to probe instead of all */
|
||||||
int overwrite = 0;
|
int overwrite = 0; /* Overwrite printed console info in compact mode */
|
||||||
unsigned int watch = 0;
|
unsigned int watch = 0; /* Refresh display every this many seconds */
|
||||||
|
int print_board_sensors = 0; /* Print CPU/motherbord/other sensors as well */
|
||||||
char *fan_speed[ICX3_MAX_FANS] = {NULL};
|
char *fan_speed[ICX3_MAX_FANS] = {NULL};
|
||||||
|
|
||||||
/* Input parsing */
|
/* Input parsing */
|
||||||
for (int i = 1; i < argc; i++){
|
for (int i = 1; i < argc; i++){
|
||||||
if (strcmp(argv[i], "--gpu") == 0) {
|
if (strcmp(argv[i], "--i2c") == 0) {
|
||||||
|
i++;
|
||||||
|
if (i < argc) {
|
||||||
|
i2c_bus = atoi(argv[i]);
|
||||||
|
} else {
|
||||||
|
printf(helpstring);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
} else if (strcmp(argv[i], "--gpu") == 0) {
|
||||||
i++;
|
i++;
|
||||||
if (i < argc) {
|
if (i < argc) {
|
||||||
gpu_num = atoi(argv[i]);
|
gpu_num = atoi(argv[i]);
|
||||||
@ -100,6 +121,10 @@ int main (int argc, char **argv)
|
|||||||
} else if (strcmp(argv[i], "--color") == 0) {
|
} else if (strcmp(argv[i], "--color") == 0) {
|
||||||
header_start = HEADER_COLOR_START;
|
header_start = HEADER_COLOR_START;
|
||||||
header_end = HEADER_COLOR_END;
|
header_end = HEADER_COLOR_END;
|
||||||
|
} else if (strcmp(argv[i], "--no-reasons") == 0) {
|
||||||
|
no_reasons = 1;
|
||||||
|
} else if (strcmp(argv[i], "--board") == 0) {
|
||||||
|
print_board_sensors = 1;
|
||||||
} else {
|
} else {
|
||||||
printf(helpstring);
|
printf(helpstring);
|
||||||
return 0;
|
return 0;
|
||||||
@ -119,8 +144,10 @@ int main (int argc, char **argv)
|
|||||||
if (overwrite && !compact)
|
if (overwrite && !compact)
|
||||||
overwrite = 0;
|
overwrite = 0;
|
||||||
|
|
||||||
gpu_count = find_evga_gpu_i2cs(gpus, MAX_GPUS);
|
/* Scan for supported GPUs */
|
||||||
|
gpu_count = find_evga_gpu_i2cs(gpus, MAX_GPUS, i2c_bus);
|
||||||
|
|
||||||
|
/* Check for no GPUs found or other errors */
|
||||||
if (gpu_count == -1) {
|
if (gpu_count == -1) {
|
||||||
printf("Error scanning I2C devices\n");
|
printf("Error scanning I2C devices\n");
|
||||||
return -1;
|
return -1;
|
||||||
@ -128,12 +155,21 @@ int main (int argc, char **argv)
|
|||||||
printf("No supported GPUs found.\nAre you root or do you have udev access to i2c devices?\nDo you need to run `modprobe i2c-dev`?\n");
|
printf("No supported GPUs found.\nAre you root or do you have udev access to i2c devices?\nDo you need to run `modprobe i2c-dev`?\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
/* Check for invalid GPUs */
|
||||||
if (gpu_num > gpu_count - 1) {
|
if (gpu_num > gpu_count - 1) {
|
||||||
printf("Invalid GPU number specified (%d, max %d)\n", gpu_num, gpu_count - 1);
|
printf("Invalid GPU number specified (%d, max %d)\n", gpu_num, gpu_count - 1);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Scan for motherboard/CPU/other sensors */
|
||||||
|
if (print_board_sensors) {
|
||||||
|
board_sensor_count = find_board_sensors(board_sensors, MAX_BOARD_SENSORS);
|
||||||
|
if (init_rapl() && board_sensor_count < MAX_BOARD_SENSORS) {
|
||||||
|
board_sensors[board_sensor_count] = rapl_sensor;
|
||||||
|
zen3_rapl_sensor = board_sensor_count;
|
||||||
|
board_sensor_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* execute fan commands */
|
/* execute fan commands */
|
||||||
if (gpu_num == -1) {
|
if (gpu_num == -1) {
|
||||||
@ -153,6 +189,10 @@ int main (int argc, char **argv)
|
|||||||
/* NVML init */
|
/* NVML init */
|
||||||
#ifdef USE_NVML
|
#ifdef USE_NVML
|
||||||
init_nvml();
|
init_nvml();
|
||||||
|
for (int i = 0; i < gpu_count; i++) {
|
||||||
|
gpus[i].nvml_device = malloc(sizeof(nvmlDevice_t));
|
||||||
|
get_nvml_handle(&gpus[i]);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* PCI init for VRAM/hotspot temps */
|
/* PCI init for VRAM/hotspot temps */
|
||||||
@ -164,17 +204,21 @@ int main (int argc, char **argv)
|
|||||||
/* print sensor info */
|
/* print sensor info */
|
||||||
if (print_info) {
|
if (print_info) {
|
||||||
do {
|
do {
|
||||||
printf("\x1b[K"); /* Clear current console line (really just for overwrite mode) */
|
if (overwrite)
|
||||||
|
printf("\x1b[K"); /* Clear current console line */
|
||||||
|
|
||||||
|
if (print_board_sensors)
|
||||||
|
print_board_info(board_sensors, board_sensor_count);
|
||||||
|
|
||||||
if (gpu_num == -1) {
|
if (gpu_num == -1) {
|
||||||
/* No GPU specified on command line, loop over all supported GPUs */
|
/* No GPU specified on command line, loop over all supported GPUs */
|
||||||
for (int i = 0; i < gpu_count; i++){
|
for (int i = 0; i < gpu_count; i++){
|
||||||
if (i > 0)
|
if (i > 0)
|
||||||
printf("\n");
|
printf("\n");
|
||||||
print_gpu_info(i, &gpus[i], compact);
|
print_gpu_info(i, &gpus[i]);
|
||||||
}
|
}
|
||||||
} else if (gpu_num <= gpu_count - 1) {
|
} else if (gpu_num <= gpu_count - 1) {
|
||||||
print_gpu_info(gpu_num, &gpus[gpu_num], compact);
|
print_gpu_info(gpu_num, &gpus[gpu_num]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!overwrite)
|
if (!overwrite)
|
||||||
@ -182,8 +226,8 @@ int main (int argc, char **argv)
|
|||||||
|
|
||||||
if (overwrite && compact) {
|
if (overwrite && compact) {
|
||||||
printf("\x1b[1G"); /* Move cursor back to column 1 */
|
printf("\x1b[1G"); /* Move cursor back to column 1 */
|
||||||
if (gpu_count > 1)
|
if (gpu_count > 1 || print_board_sensors)
|
||||||
printf("\x1b[%dA", gpu_count-1); /* Move cursor back up to the top of gpu list */
|
printf("\x1b[%dA", gpu_count-1+print_board_sensors); /* Move cursor back up to the top of gpu list */
|
||||||
}
|
}
|
||||||
|
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
@ -196,7 +240,81 @@ int main (int argc, char **argv)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_gpu_info(int gpu_num, struct card_info *gpu, int compact) {
|
void print_board_info(struct hwmon_avail_sensor *board_sensors, int num_sensors)
|
||||||
|
{
|
||||||
|
int printed_sensors = 0;
|
||||||
|
int current_sort_index = 0;
|
||||||
|
float current_reading = 0.0;
|
||||||
|
int good_reading = 0;
|
||||||
|
/* These allow us to 'summarize' units and categories by only printing them when they change */
|
||||||
|
char *last_short_name = NULL;
|
||||||
|
char *last_units = NULL;
|
||||||
|
|
||||||
|
float rapl_power;
|
||||||
|
|
||||||
|
for (int i = 0; i < (sizeof(hwmon_sensor_info) / sizeof(struct hwmon_sensor)); i++) {
|
||||||
|
|
||||||
|
/* Inject our Zen RAPL power reading here */
|
||||||
|
if (strcmp(hwmon_sensor_info[i].driver_name, "zen-rapl") == 0) {
|
||||||
|
board_sensors[zen3_rapl_sensor].sort_index = current_sort_index;
|
||||||
|
board_sensors[zen3_rapl_sensor].sensor_info = &hwmon_sensor_info[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j=0; j < num_sensors; j++) {
|
||||||
|
struct hwmon_avail_sensor *current_sensor = &board_sensors[j];
|
||||||
|
|
||||||
|
/* Loop over all sensors, but only output those with the current sort index so they come out sort of sorted
|
||||||
|
Duplicates (e.g.) multiple NVMe will come out in whatever sort of order the directory listing happened to */
|
||||||
|
if (current_sensor->sort_index == current_sort_index) {
|
||||||
|
printed_sensors++;
|
||||||
|
|
||||||
|
if (j == zen3_rapl_sensor) {
|
||||||
|
good_reading = 1;
|
||||||
|
current_reading = get_rapl_package_power();
|
||||||
|
} else {
|
||||||
|
good_reading = get_sensor_reading(current_sensor, ¤t_reading);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!good_reading)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (compact) {
|
||||||
|
/* Print units if needed */
|
||||||
|
if (last_units != NULL && strcmp(current_sensor->sensor_info->units, last_units))
|
||||||
|
printf("%s", last_units);
|
||||||
|
|
||||||
|
/* Print new section header if needed */
|
||||||
|
if (last_short_name == NULL || strcmp(current_sensor->sensor_info->short_name, last_short_name)) {
|
||||||
|
if (last_short_name != NULL) /* Spacer for all headings not the first one */
|
||||||
|
printf(" ");
|
||||||
|
printf("%s%s%s", header_start, current_sensor->sensor_info->short_name, header_end);
|
||||||
|
}
|
||||||
|
printf(" %3.0f", current_reading);
|
||||||
|
|
||||||
|
last_short_name = current_sensor->sensor_info->short_name;
|
||||||
|
last_units = current_sensor->sensor_info->units;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
printf("%s%s: %+.1f%s\n",
|
||||||
|
current_sensor->sensor_info->name_prefix,
|
||||||
|
current_sensor->sensor_name,
|
||||||
|
current_reading,
|
||||||
|
current_sensor->sensor_info->units);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
current_sort_index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (compact && last_units != NULL)
|
||||||
|
printf("%s", last_units);
|
||||||
|
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_gpu_info(int gpu_num, struct card_info *gpu)
|
||||||
|
{
|
||||||
if (compact) {
|
if (compact) {
|
||||||
/* One line per GPU */
|
/* One line per GPU */
|
||||||
printf("%s#%d FAN%s", header_start, gpu_num, header_end);
|
printf("%s#%d FAN%s", header_start, gpu_num, header_end);
|
||||||
@ -228,8 +346,10 @@ void print_gpu_info(int gpu_num, struct card_info *gpu, int compact) {
|
|||||||
#ifdef USE_NVML
|
#ifdef USE_NVML
|
||||||
printf("%s MEM %s", header_start, header_end);
|
printf("%s MEM %s", header_start, header_end);
|
||||||
printf("%3d%%", get_nvml_mem_util(gpu));
|
printf("%3d%%", get_nvml_mem_util(gpu));
|
||||||
printf("%s CLK %s", header_start, header_end);
|
if (!no_reasons) {
|
||||||
print_nvml_clock_reason(1, gpu);
|
printf("%s CLK %s", header_start, header_end);
|
||||||
|
print_nvml_clock_reason(1, gpu);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@ -249,8 +369,8 @@ void print_gpu_info(int gpu_num, struct card_info *gpu, int compact) {
|
|||||||
printf("VRAM: +%.0f°C\n", get_vram_temp(gpu)); /* Print the VRAM temp before the rest of the memory sensors */
|
printf("VRAM: +%.0f°C\n", get_vram_temp(gpu)); /* Print the VRAM temp before the rest of the memory sensors */
|
||||||
#endif
|
#endif
|
||||||
printf("%s: %+.1f°C\n",
|
printf("%s: %+.1f°C\n",
|
||||||
icx3_temp_sensor_names[i],
|
icx3_temp_sensor_names[i],
|
||||||
icx_temp_sensors[i]);
|
icx_temp_sensors[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USE_LIBPCI
|
#ifdef USE_LIBPCI
|
||||||
@ -259,11 +379,12 @@ void print_gpu_info(int gpu_num, struct card_info *gpu, int compact) {
|
|||||||
|
|
||||||
#ifdef USE_NVML
|
#ifdef USE_NVML
|
||||||
printf("Mem util: %d%%\n", get_nvml_mem_util(gpu));
|
printf("Mem util: %d%%\n", get_nvml_mem_util(gpu));
|
||||||
printf("Clock reasons: ");
|
if (!no_reasons) {
|
||||||
print_nvml_clock_reason(0, gpu);
|
printf("Clock reasons: ");
|
||||||
|
print_nvml_clock_reason(0, gpu);
|
||||||
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
85
gddr6.c
85
gddr6.c
@ -36,70 +36,69 @@ void init_gddr6(struct card_info *card)
|
|||||||
card->bar0 = (pci_dev->base_addr[0] & 0xFFFFFFFF);
|
card->bar0 = (pci_dev->base_addr[0] & 0xFFFFFFFF);
|
||||||
|
|
||||||
pci_cleanup(pacc);
|
pci_cleanup(pacc);
|
||||||
}
|
|
||||||
|
|
||||||
float get_vram_temp(struct card_info *card)
|
/* Open our memory mappings */
|
||||||
{
|
card->vram_addr = NULL;
|
||||||
|
card->hotspot_addr = NULL;
|
||||||
|
|
||||||
int fd;
|
int fd;
|
||||||
float temp = 0.0;
|
|
||||||
|
|
||||||
if ((fd = open("/dev/mem", O_RDWR | O_SYNC)) == -1)
|
if ((fd = open("/dev/mem", O_RDWR | O_SYNC)) == -1)
|
||||||
{
|
{
|
||||||
printf("Can't read memory. If you are root, enable kernel parameter iomem=relaxed\n");
|
printf("Can't read memory for VRAM and Hotspot temperatures. If you are root, enable kernel parameter iomem=relaxed\n");
|
||||||
return 0;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int phys_addr, base_offset;
|
||||||
|
void *map_base;
|
||||||
|
|
||||||
for (int i = 0; i < sizeof(device_offset_info) / sizeof(struct device_offset); i++) {
|
for (int i = 0; i < sizeof(device_offset_info) / sizeof(struct device_offset); i++) {
|
||||||
if (card->pci_device_id == device_offset_info[i].device_id){
|
if (card->pci_device_id == device_offset_info[i].device_id){
|
||||||
unsigned int phys_addr = (card->bar0 + device_offset_info[i].vram_offset);
|
/* Map for VRAM */
|
||||||
unsigned int base_offset = phys_addr & ~(PG_SZ-1);
|
phys_addr = (card->bar0 + device_offset_info[i].vram_offset);
|
||||||
void *map_base = mmap(0, PG_SZ, PROT_READ, MAP_SHARED, fd, base_offset);
|
base_offset = phys_addr & ~(PG_SZ-1);
|
||||||
|
map_base = mmap(0, PG_SZ, PROT_READ, MAP_SHARED, fd, base_offset);
|
||||||
if(map_base == (void *) -1)
|
if(map_base == (void *) -1)
|
||||||
{
|
printf("Can't map memory for VRAM temperature. If you are root, enable kernel parameter iomem=relaxed\n");
|
||||||
if (fd != -1)
|
else
|
||||||
close(fd);
|
card->vram_addr = (void *) map_base + (phys_addr - base_offset);
|
||||||
printf("Can't read memory for VRAM temperature. If you are root, enable kernel parameter iomem=relaxed\n");
|
|
||||||
}
|
|
||||||
void *virt_addr = (char *) map_base + (phys_addr - base_offset);
|
/* Map for hotspot */
|
||||||
int read_result = *((unsigned int *) virt_addr);
|
phys_addr = (card->bar0 + device_offset_info[i].hotspot_offset);
|
||||||
temp = ((read_result & 0x00000fff) / 0x20);
|
base_offset = phys_addr & ~(PG_SZ-1);
|
||||||
munmap(map_base, PG_SZ);
|
map_base = mmap(0, PG_SZ, PROT_READ, MAP_SHARED, fd, base_offset);
|
||||||
|
if(map_base == (void *) -1)
|
||||||
|
printf("Can't map memory for Hotspot temperature. If you are root, enable kernel parameter iomem=relaxed\n");
|
||||||
|
else
|
||||||
|
card->hotspot_addr = (void *) map_base + (phys_addr - base_offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
close(fd);
|
close(fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
float get_vram_temp(struct card_info *card)
|
||||||
|
{
|
||||||
|
float temp = 0.0;
|
||||||
|
|
||||||
|
if(card->vram_addr == NULL)
|
||||||
|
return 0.0;
|
||||||
|
|
||||||
|
int read_result = *((unsigned int *) card->vram_addr);
|
||||||
|
temp = ((read_result & 0x00000fff) / 0x20);
|
||||||
|
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
float get_hotspot_temp(struct card_info *card)
|
float get_hotspot_temp(struct card_info *card)
|
||||||
{
|
{
|
||||||
int fd;
|
|
||||||
float temp = 0.0;
|
float temp = 0.0;
|
||||||
|
|
||||||
if ((fd = open("/dev/mem", O_RDWR | O_SYNC)) == -1)
|
if(card->hotspot_addr == NULL)
|
||||||
{
|
return 0.0;
|
||||||
printf("Can't read memory. If you are root, enable kernel parameter iomem=relaxed\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < sizeof(device_offset_info) / sizeof(struct device_offset); i++) {
|
int read_result = *((unsigned int *) card->hotspot_addr);
|
||||||
if (card->pci_device_id == device_offset_info[i].device_id){
|
temp = (read_result >> 8) & 0xff;
|
||||||
unsigned int phys_addr = (card->bar0 + device_offset_info[i].hotspot_offset);
|
|
||||||
unsigned int base_offset = phys_addr & ~(PG_SZ-1);
|
|
||||||
void *map_base = mmap(0, PG_SZ, PROT_READ, MAP_SHARED, fd, base_offset);
|
|
||||||
if(map_base == (void *) -1)
|
|
||||||
{
|
|
||||||
if (fd != -1)
|
|
||||||
close(fd);
|
|
||||||
printf("Can't read memory for hotspot. If you are root, enable kernel parameter iomem=relaxed\n");
|
|
||||||
}
|
|
||||||
void *virt_addr = (char *) map_base + (phys_addr - base_offset);
|
|
||||||
int read_result = *((unsigned int *) virt_addr);
|
|
||||||
temp = (read_result >> 8) & 0xff;
|
|
||||||
munmap(map_base, PG_SZ);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
close(fd);
|
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
2
icx3.c
2
icx3.c
@ -237,6 +237,6 @@ void set_fan(int fan, char *setting, struct card_info *card)
|
|||||||
if (fan_readback.fanmode != fan_control.fanmode ||
|
if (fan_readback.fanmode != fan_control.fanmode ||
|
||||||
fan_readback.rpm_offset != fan_control.rpm_offset ||
|
fan_readback.rpm_offset != fan_control.rpm_offset ||
|
||||||
fan_readback.duty != fan_control.duty)
|
fan_readback.duty != fan_control.duty)
|
||||||
printf("Error setting fan %d on %s\n", fan, card->i2c_fd);
|
printf("Error setting fan %d on %s\n", fan, card->i2c_dev_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
29
icx3.h
29
icx3.h
@ -1,6 +1,7 @@
|
|||||||
#ifndef ICX3_H
|
#ifndef ICX3_H
|
||||||
#define ICX3_H
|
#define ICX3_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
#include "evga-card.h"
|
#include "evga-card.h"
|
||||||
|
|
||||||
#define ICX3_I2C_ADDR 0x2D
|
#define ICX3_I2C_ADDR 0x2D
|
||||||
@ -22,26 +23,26 @@
|
|||||||
#define ICX3_WRITE_DISABLED 0xFE
|
#define ICX3_WRITE_DISABLED 0xFE
|
||||||
|
|
||||||
struct icx3_fan_control {
|
struct icx3_fan_control {
|
||||||
unsigned char length;
|
uint8_t length;
|
||||||
unsigned char fanmode;
|
uint8_t fanmode;
|
||||||
unsigned short rpm_offset;
|
uint16_t rpm_offset;
|
||||||
unsigned char duty;
|
uint8_t duty;
|
||||||
unsigned char duty_status;
|
uint8_t duty_status;
|
||||||
unsigned short rpm_status;
|
uint16_t rpm_status;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct icx3_temp_sensors {
|
struct icx3_temp_sensors {
|
||||||
unsigned char length;
|
uint8_t length;
|
||||||
unsigned char data[18];
|
uint8_t data[18];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct icx3_info {
|
struct icx3_info {
|
||||||
unsigned char length;
|
uint8_t length;
|
||||||
unsigned char reserved;
|
uint8_t reserved;
|
||||||
unsigned char slave_address;
|
uint8_t slave_address;
|
||||||
unsigned char product_id;
|
uint8_t product_id;
|
||||||
unsigned char major_version;
|
uint8_t major_version;
|
||||||
unsigned char minor_version;
|
uint8_t minor_version;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum icx3_product_id {
|
enum icx3_product_id {
|
||||||
|
@ -10,15 +10,14 @@ void init_nvml()
|
|||||||
printf("Could not init NVML: %s\n", nvmlErrorString(result));
|
printf("Could not init NVML: %s\n", nvmlErrorString(result));
|
||||||
}
|
}
|
||||||
|
|
||||||
int get_nvml_handle(nvmlDevice_t *device, struct card_info *card)
|
void get_nvml_handle(struct card_info *card)
|
||||||
{
|
{
|
||||||
nvmlReturn_t result;
|
nvmlReturn_t result;
|
||||||
result = nvmlDeviceGetHandleByPciBusId_v2(card->pci_id, device);
|
result = nvmlDeviceGetHandleByPciBusId_v2(card->pci_id, card->nvml_device);
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
printf("Failed to get device handle for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
|
printf("Failed to get NVML device handle for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
|
||||||
return 0;
|
card->nvml_device = NULL;
|
||||||
}
|
}
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_nvml_clock_reason(int compact, struct card_info *card)
|
void print_nvml_clock_reason(int compact, struct card_info *card)
|
||||||
@ -37,7 +36,7 @@ void print_nvml_clock_reason(int compact, struct card_info *card)
|
|||||||
single_reason = 0;
|
single_reason = 0;
|
||||||
|
|
||||||
if (compact)
|
if (compact)
|
||||||
printf("%s", clock_reason_names[i].short_name);
|
printf("%-15s", clock_reason_names[i].short_name);
|
||||||
else
|
else
|
||||||
printf("%s", clock_reason_names[i].long_name);
|
printf("%s", clock_reason_names[i].long_name);
|
||||||
}
|
}
|
||||||
@ -52,12 +51,11 @@ void print_nvml_clock_reason(int compact, struct card_info *card)
|
|||||||
|
|
||||||
unsigned int get_nvml_temp(struct card_info *card)
|
unsigned int get_nvml_temp(struct card_info *card)
|
||||||
{
|
{
|
||||||
nvmlDevice_t nvml_device;
|
if (card->nvml_device == NULL)
|
||||||
if (!get_nvml_handle(&nvml_device, card))
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
unsigned int temp;
|
unsigned int temp;
|
||||||
nvmlReturn_t result = nvmlDeviceGetTemperature(nvml_device, NVML_TEMPERATURE_GPU, &temp);
|
nvmlReturn_t result = nvmlDeviceGetTemperature(*(nvmlDevice_t*)(card->nvml_device), NVML_TEMPERATURE_GPU, &temp);
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
printf("Failed to get temperature for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
|
printf("Failed to get temperature for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
|
||||||
return 0;
|
return 0;
|
||||||
@ -67,12 +65,11 @@ unsigned int get_nvml_temp(struct card_info *card)
|
|||||||
|
|
||||||
unsigned long long get_nvml_clock_reasons(struct card_info *card)
|
unsigned long long get_nvml_clock_reasons(struct card_info *card)
|
||||||
{
|
{
|
||||||
nvmlDevice_t nvml_device;
|
if (card->nvml_device == NULL)
|
||||||
if (!get_nvml_handle(&nvml_device, card))
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
unsigned long long reasons;
|
unsigned long long reasons;
|
||||||
nvmlReturn_t result = nvmlDeviceGetCurrentClocksEventReasons(nvml_device, &reasons) ;
|
nvmlReturn_t result = nvmlDeviceGetCurrentClocksEventReasons(*(nvmlDevice_t*)(card->nvml_device), &reasons) ;
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
|
printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
|
||||||
return 0;
|
return 0;
|
||||||
@ -83,12 +80,11 @@ unsigned long long get_nvml_clock_reasons(struct card_info *card)
|
|||||||
|
|
||||||
unsigned int get_nvml_mem_util(struct card_info *card)
|
unsigned int get_nvml_mem_util(struct card_info *card)
|
||||||
{
|
{
|
||||||
nvmlDevice_t nvml_device;
|
if (card->nvml_device == NULL)
|
||||||
if (!get_nvml_handle(&nvml_device, card))
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
nvmlUtilization_t util;
|
nvmlUtilization_t util;
|
||||||
nvmlReturn_t result = nvmlDeviceGetUtilizationRates(nvml_device, &util);
|
nvmlReturn_t result = nvmlDeviceGetUtilizationRates(*(nvmlDevice_t*)(card->nvml_device), &util);
|
||||||
if (result != NVML_SUCCESS) {
|
if (result != NVML_SUCCESS) {
|
||||||
printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
|
printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -22,7 +22,7 @@ static struct clock_reason clock_reason_names[] =
|
|||||||
};
|
};
|
||||||
|
|
||||||
void init_nvml();
|
void init_nvml();
|
||||||
int get_nvml_handle(nvmlDevice_t *device, struct card_info *card);
|
void get_nvml_handle(struct card_info *card);
|
||||||
void print_nvml_clock_reason(int compact, struct card_info *card);
|
void print_nvml_clock_reason(int compact, struct card_info *card);
|
||||||
unsigned int get_nvml_temp(struct card_info *card);
|
unsigned int get_nvml_temp(struct card_info *card);
|
||||||
unsigned long long get_nvml_clock_reasons(struct card_info *card);
|
unsigned long long get_nvml_clock_reasons(struct card_info *card);
|
||||||
|
112
zen3-rapl.c
Normal file
112
zen3-rapl.c
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <cpuid.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
|
||||||
|
#include "zen3-rapl.h"
|
||||||
|
|
||||||
|
#define AMD_STRING "AuthenticAMD"
|
||||||
|
#define ZEN_FAMILY 0x17
|
||||||
|
#define ZEN3_FAMILY 0x19
|
||||||
|
|
||||||
|
#define MEASUREMENT_TIME 0.25
|
||||||
|
|
||||||
|
static float energy_unit = 0;
|
||||||
|
|
||||||
|
static int msr_file = -1;
|
||||||
|
|
||||||
|
static float package_power = 0.0;
|
||||||
|
|
||||||
|
static int check_zen()
|
||||||
|
{
|
||||||
|
unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0, ext_family;
|
||||||
|
char vendor[13];
|
||||||
|
|
||||||
|
__get_cpuid(0, &eax, &ebx, &ecx, &edx);
|
||||||
|
|
||||||
|
memcpy(vendor, &ebx, 4);
|
||||||
|
memcpy(vendor+4, &edx, 4);
|
||||||
|
memcpy(vendor+8, &ecx, 4);
|
||||||
|
vendor[12] = 0;
|
||||||
|
|
||||||
|
if (strcmp(vendor, AMD_STRING) != 0){
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
__get_cpuid(1, &eax, &ebx, &ecx, &edx);
|
||||||
|
|
||||||
|
ext_family = ((eax >> 8) & 0xF) + ((eax >> 20) & 0xFF);
|
||||||
|
if (ext_family != ZEN_FAMILY && ext_family != ZEN3_FAMILY){
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int read_msr(int file, unsigned int index, unsigned long long *data)
|
||||||
|
{
|
||||||
|
if (file < 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return pread(file, data, sizeof *data, index) == sizeof *data;
|
||||||
|
}
|
||||||
|
|
||||||
|
static float get_energy_unit()
|
||||||
|
{
|
||||||
|
unsigned long long data;
|
||||||
|
// AMD OSRR: page 139 - MSRC001_0299
|
||||||
|
if (!read_msr(msr_file, 0xC0010299, &data))
|
||||||
|
return 0.0;
|
||||||
|
|
||||||
|
return pow(1.0/2.0, (float)((data >> 8) & 0x1F));
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned long get_package_energy()
|
||||||
|
{
|
||||||
|
unsigned long long data;
|
||||||
|
// AMD OSRR: page 139 - MSRC001_029B
|
||||||
|
if (!read_msr(msr_file, 0xC001029B, &data))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
int init_rapl()
|
||||||
|
{
|
||||||
|
/* Check for supported Zen CPU */
|
||||||
|
if (!check_zen())
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Open MSR file */
|
||||||
|
char *msr_path = "/dev/cpu/0/msr";
|
||||||
|
msr_file = open(msr_path, O_RDONLY);
|
||||||
|
if (msr_file < 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Energy unit */
|
||||||
|
energy_unit = get_energy_unit();
|
||||||
|
if (energy_unit == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
float get_rapl_package_power()
|
||||||
|
{
|
||||||
|
unsigned long package_eng_b = 0;
|
||||||
|
unsigned long package_eng_a = 0;
|
||||||
|
|
||||||
|
package_eng_b = get_package_energy();
|
||||||
|
|
||||||
|
usleep(MEASUREMENT_TIME*1000000);
|
||||||
|
|
||||||
|
package_eng_a = get_package_energy();
|
||||||
|
|
||||||
|
/* Only update if we computed a good power reading, otherwise return the most recent measurement */
|
||||||
|
if (package_eng_a >= package_eng_b)
|
||||||
|
package_power = (package_eng_a - package_eng_b) * energy_unit / MEASUREMENT_TIME;
|
||||||
|
|
||||||
|
return package_power;
|
||||||
|
}
|
6
zen3-rapl.h
Normal file
6
zen3-rapl.h
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#include "board-sensors.h"
|
||||||
|
|
||||||
|
static struct hwmon_avail_sensor rapl_sensor = {"Package power", -1, NULL, NULL};
|
||||||
|
|
||||||
|
int init_rapl();
|
||||||
|
float get_rapl_package_power();
|
Loading…
x
Reference in New Issue
Block a user