Added support for some NVML sensors.
GPU temp sensor and clock reasons.
This commit is contained in:
parent
4cb9ef42a6
commit
4c5321bc4e
@ -4,13 +4,20 @@ OBJS = evga-icx.o evga-card.o icx3.o
|
|||||||
LDLIBS = -li2c
|
LDLIBS = -li2c
|
||||||
CFLAGS = -MD
|
CFLAGS = -MD
|
||||||
|
|
||||||
|
ifdef USE_NVML
|
||||||
|
LDLIBS += -lnvidia-ml
|
||||||
|
CFLAGS += -DUSE_NVML
|
||||||
|
OBJS += nvidia-sensors.o
|
||||||
|
endif
|
||||||
|
|
||||||
evga-icx : $(OBJS)
|
evga-icx : $(OBJS)
|
||||||
|
|
||||||
debug : CFLAGS += -g -O0
|
debug : CFLAGS += -g -O0
|
||||||
debug : evga-icx
|
debug : evga-icx
|
||||||
|
|
||||||
clean :
|
clean :
|
||||||
rm evga-icx $(OBJS)
|
rm evga-icx
|
||||||
|
rm *.o
|
||||||
rm *.d
|
rm *.d
|
||||||
|
|
||||||
-include $(OBJS:.o=.d)
|
-include $(OBJS:.o=.d)
|
@ -24,6 +24,11 @@ Access to the `/dev/i2c` device files, which means either:
|
|||||||
## Building
|
## Building
|
||||||
`make`
|
`make`
|
||||||
|
|
||||||
|
## Optional features
|
||||||
|
|
||||||
|
### NVML support
|
||||||
|
Add the make flag `USE_NVML=1` and the it will also display the main GPU temperature ("GPU1") as reported by the NVIDIA driver. It will also display the performance cap/clock reason.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
Note that when controlling fans directly through iCX3 they will fall offline from the Nvidia driver and show as 0 RPM until you return them to automatic mode.
|
Note that when controlling fans directly through iCX3 they will fall offline from the Nvidia driver and show as 0 RPM until you return them to automatic mode.
|
||||||
|
|
||||||
|
66
evga-icx.c
66
evga-icx.c
@ -5,6 +5,10 @@
|
|||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#ifdef USE_NVML
|
||||||
|
#include "nvidia-sensors.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "icx3.h"
|
#include "icx3.h"
|
||||||
#include "evga-card.h"
|
#include "evga-card.h"
|
||||||
|
|
||||||
@ -32,7 +36,7 @@ int main (int argc, char **argv)
|
|||||||
int print_info = 0;
|
int print_info = 0;
|
||||||
int compact = 0;
|
int compact = 0;
|
||||||
int gpu_num = -1; /* Card to control */
|
int gpu_num = -1; /* Card to control */
|
||||||
int watch = -1;
|
unsigned int watch = 0;
|
||||||
char *fan_speed[ICX3_MAX_FANS] = {NULL};
|
char *fan_speed[ICX3_MAX_FANS] = {NULL};
|
||||||
|
|
||||||
/* Input parsing */
|
/* Input parsing */
|
||||||
@ -125,33 +129,57 @@ int main (int argc, char **argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* NVML init */
|
||||||
|
#ifdef USE_NVML
|
||||||
|
int nvml_ok = init_nvml();
|
||||||
|
nvmlDevice_t* device[MAX_GPUS];
|
||||||
|
#endif
|
||||||
|
|
||||||
/* print sensor info */
|
/* print sensor info */
|
||||||
print:
|
|
||||||
if (print_info) {
|
if (print_info) {
|
||||||
if (gpu_num == -1) {
|
do {
|
||||||
for (int i = 0; i < gpu_count; i++){
|
if (gpu_num == -1) {
|
||||||
print_gpu_info(i, gpus, compact);
|
for (int i = 0; i < gpu_count; i++){
|
||||||
}
|
print_gpu_info(i, &gpus[i], compact);
|
||||||
} else if (gpu_num <= gpu_count - 1) {
|
}
|
||||||
print_gpu_info(gpu_num, gpus, compact);
|
} else if (gpu_num <= gpu_count - 1) {
|
||||||
}
|
print_gpu_info(gpu_num, &gpus[gpu_num], compact);
|
||||||
}
|
}
|
||||||
if (watch > 0) {
|
sleep(watch);
|
||||||
sleep(watch);
|
} while (watch > 0);
|
||||||
goto print;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef USE_NVML
|
||||||
|
nvmlShutdown();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_gpu_info(int gpu_num, struct card_info gpus[], int compact) {
|
void print_gpu_info(int gpu_num, struct card_info *gpu, int compact) {
|
||||||
if (compact) {
|
if (compact) {
|
||||||
|
/* One line per GPU */
|
||||||
printf("#%d ", gpu_num);
|
printf("#%d ", gpu_num);
|
||||||
print_icx3_fans_oneline(&gpus[gpu_num]);
|
print_icx3_fans_oneline(gpu);
|
||||||
print_icx3_temps_oneline(&gpus[gpu_num]);
|
printf(" GPU");
|
||||||
|
#ifdef USE_NVML
|
||||||
|
print_nvml_temp(1, gpu);
|
||||||
|
#endif
|
||||||
|
print_icx3_temps_oneline(gpu);
|
||||||
|
printf("°C");
|
||||||
|
#ifdef USE_NVML
|
||||||
|
print_nvml_clock_reason(1, gpu);
|
||||||
|
#endif
|
||||||
printf("\n");
|
printf("\n");
|
||||||
} else {
|
} else {
|
||||||
printf("#%d: %s (%s) @ %s\n", gpu_num, gpus[gpu_num].card_name, gpus[gpu_num].i2c_dev_path, gpus[gpu_num].pci_id);
|
/* One line per GPU sensor */
|
||||||
print_icx3_fans(&gpus[gpu_num]);
|
printf("#%d: %s (%s) @ %s\n", gpu_num, gpu->card_name, gpu->i2c_dev_path, gpu->pci_id);
|
||||||
print_icx3_temps(&gpus[gpu_num]);
|
print_icx3_fans(gpu);
|
||||||
|
#ifdef USE_NVML
|
||||||
|
print_nvml_temp(0, gpu);
|
||||||
|
#endif
|
||||||
|
print_icx3_temps(gpu);
|
||||||
|
#ifdef USE_NVML
|
||||||
|
print_nvml_clock_reason(0, gpu);
|
||||||
|
#endif
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
4
icx3.c
4
icx3.c
@ -89,11 +89,11 @@ void print_icx3_temps_oneline(struct card_info *card)
|
|||||||
get_temp_sensors(temps, card);
|
get_temp_sensors(temps, card);
|
||||||
|
|
||||||
for (int i=0; i<ICX3_NUM_TEMP_SENSORS; i++) {
|
for (int i=0; i<ICX3_NUM_TEMP_SENSORS; i++) {
|
||||||
if (i == 0 || strncmp(icx3_temp_sensor_names[i], icx3_temp_sensor_names[i-1], 3))
|
/* If this math seems a little jank, it's so we can optionally inject the NVML and vram temps into the oneline */
|
||||||
|
if (i > 0 && strncmp(icx3_temp_sensor_names[i], icx3_temp_sensor_names[i-1], 3))
|
||||||
printf(" %.3s", icx3_temp_sensor_names[i]);
|
printf(" %.3s", icx3_temp_sensor_names[i]);
|
||||||
printf(" %3.0f", temps[i]);
|
printf(" %3.0f", temps[i]);
|
||||||
}
|
}
|
||||||
printf("°C");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_available_fans(char *fans_avail, struct card_info *card)
|
void get_available_fans(char *fans_avail, struct card_info *card)
|
||||||
|
95
nvidia-sensors.c
Normal file
95
nvidia-sensors.c
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "nvidia-sensors.h"
|
||||||
|
|
||||||
|
int init_nvml()
|
||||||
|
{
|
||||||
|
nvmlReturn_t result;
|
||||||
|
result = nvmlInit_v2();
|
||||||
|
if (result != NVML_SUCCESS) {
|
||||||
|
printf("Could not init NVML: %s\n", nvmlErrorString(result));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_nvml_temp(int compact, struct card_info *card)
|
||||||
|
{
|
||||||
|
if (compact)
|
||||||
|
printf(" %3d", get_nvml_temp(card));
|
||||||
|
else
|
||||||
|
printf("GPU1: %+d°C\n", get_nvml_temp(card));
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_nvml_clock_reason(int compact, struct card_info *card)
|
||||||
|
{
|
||||||
|
unsigned long long reasons = get_nvml_clock_reasons(card);
|
||||||
|
int single_reason = 1;
|
||||||
|
|
||||||
|
if (compact)
|
||||||
|
printf(" CLK ");
|
||||||
|
else
|
||||||
|
printf("Clock reasons: ");
|
||||||
|
|
||||||
|
for (int i = 0; i < (sizeof(clock_reason_names) / sizeof(struct clock_reason)); i++) {
|
||||||
|
if (reasons & clock_reason_names[i].mask) {
|
||||||
|
if (!single_reason) {
|
||||||
|
if (compact)
|
||||||
|
printf(",");
|
||||||
|
else
|
||||||
|
printf(", ");
|
||||||
|
}
|
||||||
|
single_reason = 0;
|
||||||
|
|
||||||
|
if (compact)
|
||||||
|
printf("%s", clock_reason_names[i].short_name);
|
||||||
|
else
|
||||||
|
printf("%s", clock_reason_names[i].long_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (single_reason)
|
||||||
|
printf("None");
|
||||||
|
|
||||||
|
if (!compact)
|
||||||
|
printf(" (0x%llx)\n", reasons);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int get_nvml_temp(struct card_info *card)
|
||||||
|
{
|
||||||
|
nvmlReturn_t result;
|
||||||
|
nvmlDevice_t nvml_device;
|
||||||
|
result = nvmlDeviceGetHandleByPciBusId_v2(card->pci_id, &nvml_device);
|
||||||
|
if (result != NVML_SUCCESS) {
|
||||||
|
printf("Failed to get device handle for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int temp;
|
||||||
|
result = nvmlDeviceGetTemperature(nvml_device, NVML_TEMPERATURE_GPU, &temp);
|
||||||
|
if (result != NVML_SUCCESS) {
|
||||||
|
printf("Failed to get temperature for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long long get_nvml_clock_reasons(struct card_info *card)
|
||||||
|
{
|
||||||
|
nvmlReturn_t result;
|
||||||
|
nvmlDevice_t nvml_device;
|
||||||
|
result = nvmlDeviceGetHandleByPciBusId_v2(card->pci_id, &nvml_device);
|
||||||
|
if (result != NVML_SUCCESS) {
|
||||||
|
printf("Failed to get device handle for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long long reasons;
|
||||||
|
result = nvmlDeviceGetCurrentClocksEventReasons(nvml_device, &reasons) ;
|
||||||
|
if (result != NVML_SUCCESS) {
|
||||||
|
printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return reasons;
|
||||||
|
}
|
||||||
|
|
28
nvidia-sensors.h
Normal file
28
nvidia-sensors.h
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#include <nvml.h>
|
||||||
|
|
||||||
|
#include "evga-card.h"
|
||||||
|
|
||||||
|
struct clock_reason {
|
||||||
|
unsigned long long mask;
|
||||||
|
char *short_name;
|
||||||
|
char *long_name;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct clock_reason clock_reason_names[] =
|
||||||
|
{
|
||||||
|
{nvmlClocksEventReasonGpuIdle, "Idle", "GPU idle"},
|
||||||
|
{nvmlClocksEventReasonApplicationsClocksSetting, "AppClk", "Application clocks"},
|
||||||
|
{nvmlClocksEventReasonSwPowerCap, "Pwr", "Power cap"},
|
||||||
|
{nvmlClocksThrottleReasonHwSlowdown, "HWSlow", "Hardware slowdown"},
|
||||||
|
{nvmlClocksEventReasonSyncBoost, "Sync", "Sync boost"},
|
||||||
|
{nvmlClocksEventReasonSwThermalSlowdown, "SWTherm", "Software thermal"},
|
||||||
|
{nvmlClocksThrottleReasonHwThermalSlowdown, "HWTherm", "Hardware thermal"},
|
||||||
|
{nvmlClocksThrottleReasonHwPowerBrakeSlowdown, "HWPower", "Hardware power brake"},
|
||||||
|
{nvmlClocksEventReasonDisplayClockSetting, "DispClk", "Display clock"}
|
||||||
|
};
|
||||||
|
|
||||||
|
int init_nvml();
|
||||||
|
void print_nvml_temp(int compact, struct card_info *card);
|
||||||
|
void print_nvml_clock_reason(int compact, struct card_info *card);
|
||||||
|
unsigned int get_nvml_temp(struct card_info *card);
|
||||||
|
unsigned long long get_nvml_clock_reasons(struct card_info *card);
|
Loading…
x
Reference in New Issue
Block a user