Reduced number of NVML function calls

This commit is contained in:
moosecrap 2025-02-21 10:59:31 -08:00
parent 46eb773820
commit b7d22ed9ac
5 changed files with 16 additions and 17 deletions

View File

@ -41,7 +41,7 @@ Note that when controlling fans directly through iCX3 they will fall offline fro
```text ```text
Available options: Available options:
--i2c N : Only probe I2C bus N instead of all (may help with stuttering with --watch) --i2c N : Only probe I2C bus N instead of all (may help with stuttering or freezing when probing I2C devices)
--gpu N : Control only GPU N instead of all supported cards --gpu N : Control only GPU N instead of all supported cards
--fan SPEED : Set all fans at once to SPEED (see below) --fan SPEED : Set all fans at once to SPEED (see below)
--fanN SPEED : Set fan N (0-3) to SPEED --fanN SPEED : Set fan N (0-3) to SPEED

View File

@ -91,6 +91,7 @@ struct card_info {
int i2c_fd; /* File descriptor for the i2c device file, for re-use */ int i2c_fd; /* File descriptor for the i2c device file, for re-use */
int product_id; /* EVGA internal product ID, as reported by the iCX3 controller */ int product_id; /* EVGA internal product ID, as reported by the iCX3 controller */
unsigned int bar0; /* Address of the card's PCI base address register */ unsigned int bar0; /* Address of the card's PCI base address register */
void *nvml_device; /* Pointer to nvmlDevice_t for use in NVML calls */
}; };
struct gpu_pci_info { struct gpu_pci_info {

View File

@ -24,7 +24,7 @@ char *header_start = "";
char *header_end = ""; char *header_end = "";
static const char helpstring[] = "Available options:\n" static const char helpstring[] = "Available options:\n"
"--i2c N : Only probe I2C bus N instead of all (may help with stuttering with --watch)\n" "--i2c N : Only probe I2C bus N instead of all (may help with stuttering or freezing when probing I2C devices)\n"
"--gpu N : Control only GPU N instead of all supported cards\n" "--gpu N : Control only GPU N instead of all supported cards\n"
"--fan SPEED : Set all fans at once to SPEED (see below)\n" "--fan SPEED : Set all fans at once to SPEED (see below)\n"
"--fanN SPEED : Set fan N (0-3) to SPEED\n" "--fanN SPEED : Set fan N (0-3) to SPEED\n"
@ -163,6 +163,8 @@ int main (int argc, char **argv)
/* NVML init */ /* NVML init */
#ifdef USE_NVML #ifdef USE_NVML
init_nvml(); init_nvml();
for (int i = 0; i < gpu_count; i++)
get_nvml_handle(&gpus[i]);
#endif #endif
/* PCI init for VRAM/hotspot temps */ /* PCI init for VRAM/hotspot temps */

View File

@ -10,15 +10,14 @@ void init_nvml()
printf("Could not init NVML: %s\n", nvmlErrorString(result)); printf("Could not init NVML: %s\n", nvmlErrorString(result));
} }
int get_nvml_handle(nvmlDevice_t *device, struct card_info *card) void get_nvml_handle(struct card_info *card)
{ {
nvmlReturn_t result; nvmlReturn_t result;
result = nvmlDeviceGetHandleByPciBusId_v2(card->pci_id, device); result = nvmlDeviceGetHandleByPciBusId_v2(card->pci_id, card->nvml_device);
if (result != NVML_SUCCESS) { if (result != NVML_SUCCESS) {
printf("Failed to get device handle for card at %s: %s\n", card->pci_id, nvmlErrorString(result)); printf("Failed to get NVML device handle for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
return 0; card->nvml_device = NULL;
} }
return 1;
} }
void print_nvml_clock_reason(int compact, struct card_info *card) void print_nvml_clock_reason(int compact, struct card_info *card)
@ -52,12 +51,11 @@ void print_nvml_clock_reason(int compact, struct card_info *card)
unsigned int get_nvml_temp(struct card_info *card) unsigned int get_nvml_temp(struct card_info *card)
{ {
nvmlDevice_t nvml_device; if (card->nvml_device == NULL)
if (!get_nvml_handle(&nvml_device, card))
return 0; return 0;
unsigned int temp; unsigned int temp;
nvmlReturn_t result = nvmlDeviceGetTemperature(nvml_device, NVML_TEMPERATURE_GPU, &temp); nvmlReturn_t result = nvmlDeviceGetTemperature(*(nvmlDevice_t*)(card->nvml_device), NVML_TEMPERATURE_GPU, &temp);
if (result != NVML_SUCCESS) { if (result != NVML_SUCCESS) {
printf("Failed to get temperature for card at %s: %s\n", card->pci_id, nvmlErrorString(result)); printf("Failed to get temperature for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
return 0; return 0;
@ -67,12 +65,11 @@ unsigned int get_nvml_temp(struct card_info *card)
unsigned long long get_nvml_clock_reasons(struct card_info *card) unsigned long long get_nvml_clock_reasons(struct card_info *card)
{ {
nvmlDevice_t nvml_device; if (card->nvml_device == NULL)
if (!get_nvml_handle(&nvml_device, card))
return 0; return 0;
unsigned long long reasons; unsigned long long reasons;
nvmlReturn_t result = nvmlDeviceGetCurrentClocksEventReasons(nvml_device, &reasons) ; nvmlReturn_t result = nvmlDeviceGetCurrentClocksEventReasons(*(nvmlDevice_t*)(card->nvml_device), &reasons) ;
if (result != NVML_SUCCESS) { if (result != NVML_SUCCESS) {
printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result)); printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
return 0; return 0;
@ -83,12 +80,11 @@ unsigned long long get_nvml_clock_reasons(struct card_info *card)
unsigned int get_nvml_mem_util(struct card_info *card) unsigned int get_nvml_mem_util(struct card_info *card)
{ {
nvmlDevice_t nvml_device; if (card->nvml_device == NULL)
if (!get_nvml_handle(&nvml_device, card))
return 0; return 0;
nvmlUtilization_t util; nvmlUtilization_t util;
nvmlReturn_t result = nvmlDeviceGetUtilizationRates(nvml_device, &util); nvmlReturn_t result = nvmlDeviceGetUtilizationRates(*(nvmlDevice_t*)(card->nvml_device), &util);
if (result != NVML_SUCCESS) { if (result != NVML_SUCCESS) {
printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result)); printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
return 0; return 0;

View File

@ -22,7 +22,7 @@ static struct clock_reason clock_reason_names[] =
}; };
void init_nvml(); void init_nvml();
int get_nvml_handle(nvmlDevice_t *device, struct card_info *card); void get_nvml_handle(struct card_info *card);
void print_nvml_clock_reason(int compact, struct card_info *card); void print_nvml_clock_reason(int compact, struct card_info *card);
unsigned int get_nvml_temp(struct card_info *card); unsigned int get_nvml_temp(struct card_info *card);
unsigned long long get_nvml_clock_reasons(struct card_info *card); unsigned long long get_nvml_clock_reasons(struct card_info *card);