#include #include #include #include #include #include #ifdef USE_NVML #include "nvidia-sensors.h" #endif #ifdef USE_LIBPCI #include "gddr6.h" #endif #include "icx3.h" #include "evga-card.h" #define MAX_GPUS 16 #define HEADER_COLOR_START "\x1b[36m" #define HEADER_COLOR_END "\x1b[39m" char *header_start = ""; char *header_end = ""; static const char helpstring[] = "Available options:\n" "--i2c N : Only probe I2C bus N instead of all (may help with stuttering or freezing when probing I2C devices)\n" "--gpu N : Control only GPU N instead of all supported cards\n" "--fan SPEED : Set all fans at once to SPEED (see below)\n" "--fanN SPEED : Set fan N (0-3) to SPEED\n" " SPEED may be one of the following:\n" " 'auto' to return the fan to its default control mode\n" " N to set the fan to that manual % speed\n" " [+/-]N to set that fan to an RPM offset from the GPU-controlled speed\n" "--reset : Reset all fans to their default mode\n" "--sensors : Print sensor readings even if setting a fan speed \n" "--compact : Print sensor reading in a compact one-line per card format\n" "--watch N : Keep printing output every N seconds\n" "--overwrite : Overwrite previously displayed info with --watch and --compact instead of continuously logging\n" "--color : Print headers in color in --compact mode for better readability\n"; void print_gpu_info(int gpu_num, struct card_info gpus[], int compact); int main (int argc, char **argv) { struct card_info gpus[MAX_GPUS]; int gpu_count; int print_info = 0; int compact = 0; int gpu_num = -1; /* Card to control */ int i2c_bus = -1; int overwrite = 0; unsigned int watch = 0; char *fan_speed[ICX3_MAX_FANS] = {NULL}; /* Input parsing */ for (int i = 1; i < argc; i++){ if (strcmp(argv[i], "--i2c") == 0) { i++; if (i < argc) { i2c_bus = atoi(argv[i]); } else { printf(helpstring); return -1; } } else if (strcmp(argv[i], "--gpu") == 0) { i++; if (i < argc) { gpu_num = atoi(argv[i]); } else { printf(helpstring); return -1; } } else if (strcmp(argv[i], "--fan") == 0) { i++; if (i < argc) { for (int j = 0; j < ICX3_MAX_FANS; j++) fan_speed[j] = argv[i]; } else { printf(helpstring); return -1; } } else if (strncmp(argv[i], "--fan", 5) == 0) { int fan_num = atoi(argv[i]+5); i++; if (i < argc) { if (fan_num <= ICX3_MAX_FANS) fan_speed[fan_num] = argv[i]; } else { printf(helpstring); return -1; } } else if (strcmp(argv[i], "--reset") == 0) { for (int j = 0; j < ICX3_MAX_FANS; j++) fan_speed[j] = "auto"; } else if (strcmp(argv[i], "--sensors") == 0) { print_info = 1; } else if (strcmp(argv[i], "--compact") == 0) { compact = 1; } else if (strcmp(argv[i], "--watch") == 0) { i++; if (i < argc) { watch = atoi(argv[i]); } else { printf(helpstring); return -1; } } else if (strcmp(argv[i], "--overwrite") == 0) { overwrite = 1; } else if (strcmp(argv[i], "--color") == 0) { header_start = HEADER_COLOR_START; header_end = HEADER_COLOR_END; } else { printf(helpstring); return 0; } } if (print_info == 0) { /* Check for no fan commands given, so display info by default */ print_info = 1; for (int i = 0; i < ICX3_MAX_FANS; i++) { if (fan_speed[i] != NULL) print_info = 0; } } /* Don't use overwrite mode unless set to compact (we can't tell how many lines the output will be per GPU) */ if (overwrite && !compact) overwrite = 0; gpu_count = find_evga_gpu_i2cs(gpus, MAX_GPUS, i2c_bus); if (gpu_count == -1) { printf("Error scanning I2C devices\n"); return -1; } else if (gpu_count == 0) { printf("No supported GPUs found.\nAre you root or do you have udev access to i2c devices?\nDo you need to run `modprobe i2c-dev`?\n"); return -1; } if (gpu_num > gpu_count - 1) { printf("Invalid GPU number specified (%d, max %d)\n", gpu_num, gpu_count - 1); return -1; } /* execute fan commands */ if (gpu_num == -1) { for (int i = 0; i < gpu_count; i++){ for (int j = 0; j < ICX3_MAX_FANS; j++) { if (fan_speed[j] != NULL) set_fan(j, fan_speed[j], &gpus[i]); } } } else if (gpu_num <= gpu_count - 1) { for (int j = 0; j < ICX3_MAX_FANS; j++) { if (fan_speed[j] != NULL) set_fan(gpu_num, fan_speed[j], &gpus[gpu_num]); } } /* NVML init */ #ifdef USE_NVML init_nvml(); for (int i = 0; i < gpu_count; i++) get_nvml_handle(&gpus[i]); #endif /* PCI init for VRAM/hotspot temps */ #ifdef USE_LIBPCI for (int i = 0; i < gpu_count; i++) init_gddr6(&gpus[i]); #endif /* print sensor info */ if (print_info) { do { if (overwrite) printf("\x1b[K"); /* Clear current console line */ if (gpu_num == -1) { /* No GPU specified on command line, loop over all supported GPUs */ for (int i = 0; i < gpu_count; i++){ if (i > 0) printf("\n"); print_gpu_info(i, &gpus[i], compact); } } else if (gpu_num <= gpu_count - 1) { print_gpu_info(gpu_num, &gpus[gpu_num], compact); } if (!overwrite) printf("\n"); /* Print line break at the end for continuous output */ if (overwrite && compact) { printf("\x1b[1G"); /* Move cursor back to column 1 */ if (gpu_count > 1) printf("\x1b[%dA", gpu_count-1); /* Move cursor back up to the top of gpu list */ } fflush(stdout); sleep(watch); } while (watch > 0); } #ifdef USE_NVML nvmlShutdown(); #endif } void print_gpu_info(int gpu_num, struct card_info *gpu, int compact) { if (compact) { /* One line per GPU */ printf("%s#%d FAN%s", header_start, gpu_num, header_end); print_icx3_fans_oneline(gpu); printf("%s GPU%s", header_start, header_end); #ifdef USE_NVML printf(" %3d", get_nvml_temp(gpu)); #endif float icx_temp_sensors[ICX3_NUM_TEMP_SENSORS] = {}; get_temp_sensors(icx_temp_sensors, gpu); for (int i = 0; i < ICX3_NUM_TEMP_SENSORS; i++) { if (i > 0 && strncmp(icx3_temp_sensor_names[i], icx3_temp_sensor_names[i-1], 3)) printf("%s %.3s%s", header_start, icx3_temp_sensor_names[i], header_end); #ifdef USE_LIBPCI if (strncmp(icx3_temp_sensor_names[i], "MEM1", 4) == 0) printf(" %3.0f", get_vram_temp(gpu)); /* Print the VRAM temp before the rest of the memory sensors */ #endif printf(" %3.0f", icx_temp_sensors[i]); } #ifdef USE_LIBPCI printf("%s HOT%s %3.0f", header_start, header_end, get_hotspot_temp(gpu)); #endif printf("°C "); #ifdef USE_NVML printf("%s MEM %s", header_start, header_end); printf("%3d%%", get_nvml_mem_util(gpu)); printf("%s CLK %s", header_start, header_end); print_nvml_clock_reason(1, gpu); #endif } else { /* One line per GPU sensor */ printf("#%d: %s (%s) @ %s\n", gpu_num, gpu->card_name, gpu->i2c_dev_path, gpu->pci_id); print_icx3_fans(gpu); #ifdef USE_NVML printf("GPU1: %+d°C\n", get_nvml_temp(gpu)); #endif float icx_temp_sensors[ICX3_NUM_TEMP_SENSORS] = {}; get_temp_sensors(icx_temp_sensors, gpu); for (int i = 0; i < ICX3_NUM_TEMP_SENSORS; i++) { #ifdef USE_LIBPCI if (strncmp(icx3_temp_sensor_names[i], "MEM1", 4) == 0) printf("VRAM: +%.0f°C\n", get_vram_temp(gpu)); /* Print the VRAM temp before the rest of the memory sensors */ #endif printf("%s: %+.1f°C\n", icx3_temp_sensor_names[i], icx_temp_sensors[i]); } #ifdef USE_LIBPCI printf("HotSpot: +%.0f°C\n", get_hotspot_temp(gpu)); #endif #ifdef USE_NVML printf("Mem util: %d%%\n", get_nvml_mem_util(gpu)); printf("Clock reasons: "); print_nvml_clock_reason(0, gpu); printf("\n"); #endif } }