Compare commits

...

4 Commits

Author SHA1 Message Date
5e78a059a6 Fix in fan error handling 2025-02-21 11:32:02 -08:00
20e2683110 Reuse memory mappings 2025-02-21 11:26:17 -08:00
b7d22ed9ac Reduced number of NVML function calls 2025-02-21 10:59:31 -08:00
46eb773820 Readme update 2025-02-17 04:33:35 -08:00
7 changed files with 66 additions and 64 deletions

View File

@ -14,6 +14,8 @@ A supported EVGA 30-series card with iCX3. This includes:
The number of fans supported depends, of course, on your particular model. The number of fans supported depends, of course, on your particular model.
You must have the `i2c-dev` kernel module loaded with `modprobe i2c-dev`
Access to the `/dev/i2c` device files, which means either: Access to the `/dev/i2c` device files, which means either:
* Run as root, or * Run as root, or
* Install udev rules to allow user access. If you have the OpenRGB udev rules installed to control the LEDs you already have this set up. * Install udev rules to allow user access. If you have the OpenRGB udev rules installed to control the LEDs you already have this set up.
@ -39,7 +41,7 @@ Note that when controlling fans directly through iCX3 they will fall offline fro
```text ```text
Available options: Available options:
--i2c N : Only probe I2C bus N instead of all (may help with stuttering with --watch) --i2c N : Only probe I2C bus N instead of all (may help with stuttering or freezing when probing I2C devices)
--gpu N : Control only GPU N instead of all supported cards --gpu N : Control only GPU N instead of all supported cards
--fan SPEED : Set all fans at once to SPEED (see below) --fan SPEED : Set all fans at once to SPEED (see below)
--fanN SPEED : Set fan N (0-3) to SPEED --fanN SPEED : Set fan N (0-3) to SPEED

View File

@ -91,6 +91,9 @@ struct card_info {
int i2c_fd; /* File descriptor for the i2c device file, for re-use */ int i2c_fd; /* File descriptor for the i2c device file, for re-use */
int product_id; /* EVGA internal product ID, as reported by the iCX3 controller */ int product_id; /* EVGA internal product ID, as reported by the iCX3 controller */
unsigned int bar0; /* Address of the card's PCI base address register */ unsigned int bar0; /* Address of the card's PCI base address register */
void *nvml_device; /* Pointer to nvmlDevice_t for use in NVML calls */
void *vram_addr; /* Memory mapping for GDDR6 temps */
void *hotspot_addr; /* Memory mapping for hotspot temperature */
}; };
struct gpu_pci_info { struct gpu_pci_info {

View File

@ -24,7 +24,7 @@ char *header_start = "";
char *header_end = ""; char *header_end = "";
static const char helpstring[] = "Available options:\n" static const char helpstring[] = "Available options:\n"
"--i2c N : Only probe I2C bus N instead of all (may help with stuttering with --watch)\n" "--i2c N : Only probe I2C bus N instead of all (may help with stuttering or freezing when probing I2C devices)\n"
"--gpu N : Control only GPU N instead of all supported cards\n" "--gpu N : Control only GPU N instead of all supported cards\n"
"--fan SPEED : Set all fans at once to SPEED (see below)\n" "--fan SPEED : Set all fans at once to SPEED (see below)\n"
"--fanN SPEED : Set fan N (0-3) to SPEED\n" "--fanN SPEED : Set fan N (0-3) to SPEED\n"
@ -163,6 +163,8 @@ int main (int argc, char **argv)
/* NVML init */ /* NVML init */
#ifdef USE_NVML #ifdef USE_NVML
init_nvml(); init_nvml();
for (int i = 0; i < gpu_count; i++)
get_nvml_handle(&gpus[i]);
#endif #endif
/* PCI init for VRAM/hotspot temps */ /* PCI init for VRAM/hotspot temps */

91
gddr6.c
View File

@ -36,70 +36,69 @@ void init_gddr6(struct card_info *card)
card->bar0 = (pci_dev->base_addr[0] & 0xFFFFFFFF); card->bar0 = (pci_dev->base_addr[0] & 0xFFFFFFFF);
pci_cleanup(pacc); pci_cleanup(pacc);
}
float get_vram_temp(struct card_info *card)
{
int fd;
float temp = 0.0;
/* Open our memory mappings */
card->vram_addr = NULL;
card->hotspot_addr = NULL;
int fd;
if ((fd = open("/dev/mem", O_RDWR | O_SYNC)) == -1) if ((fd = open("/dev/mem", O_RDWR | O_SYNC)) == -1)
{ {
printf("Can't read memory. If you are root, enable kernel parameter iomem=relaxed\n"); printf("Can't read memory for VRAM and Hotspot temperatures. If you are root, enable kernel parameter iomem=relaxed\n");
return 0; return;
} }
unsigned int phys_addr, base_offset;
void *map_base;
for (int i = 0; i < sizeof(device_offset_info) / sizeof(struct device_offset); i++) { for (int i = 0; i < sizeof(device_offset_info) / sizeof(struct device_offset); i++) {
if (card->pci_device_id == device_offset_info[i].device_id){ if (card->pci_device_id == device_offset_info[i].device_id){
unsigned int phys_addr = (card->bar0 + device_offset_info[i].vram_offset); /* Map for VRAM */
unsigned int base_offset = phys_addr & ~(PG_SZ-1); phys_addr = (card->bar0 + device_offset_info[i].vram_offset);
void *map_base = mmap(0, PG_SZ, PROT_READ, MAP_SHARED, fd, base_offset); base_offset = phys_addr & ~(PG_SZ-1);
map_base = mmap(0, PG_SZ, PROT_READ, MAP_SHARED, fd, base_offset);
if(map_base == (void *) -1) if(map_base == (void *) -1)
{ printf("Can't map memory for VRAM temperature. If you are root, enable kernel parameter iomem=relaxed\n");
if (fd != -1) else
close(fd); card->vram_addr = (void *) map_base + (phys_addr - base_offset);
printf("Can't read memory for VRAM temperature. If you are root, enable kernel parameter iomem=relaxed\n");
}
void *virt_addr = (char *) map_base + (phys_addr - base_offset); /* Map for hotspot */
int read_result = *((unsigned int *) virt_addr); phys_addr = (card->bar0 + device_offset_info[i].hotspot_offset);
temp = ((read_result & 0x00000fff) / 0x20); base_offset = phys_addr & ~(PG_SZ-1);
munmap(map_base, PG_SZ); map_base = mmap(0, PG_SZ, PROT_READ, MAP_SHARED, fd, base_offset);
if(map_base == (void *) -1)
printf("Can't map memory for Hotspot temperature. If you are root, enable kernel parameter iomem=relaxed\n");
else
card->hotspot_addr = (void *) map_base + (phys_addr - base_offset);
} }
} }
close(fd); close(fd);
}
float get_vram_temp(struct card_info *card)
{
float temp = 0.0;
if(card->vram_addr == NULL)
return 0.0;
int read_result = *((unsigned int *) card->vram_addr);
temp = ((read_result & 0x00000fff) / 0x20);
return temp; return temp;
} }
float get_hotspot_temp(struct card_info *card) float get_hotspot_temp(struct card_info *card)
{ {
int fd;
float temp = 0.0; float temp = 0.0;
if ((fd = open("/dev/mem", O_RDWR | O_SYNC)) == -1) if(card->hotspot_addr == NULL)
{ return 0.0;
printf("Can't read memory. If you are root, enable kernel parameter iomem=relaxed\n");
return 0; int read_result = *((unsigned int *) card->hotspot_addr);
} temp = (read_result >> 8) & 0xff;
for (int i = 0; i < sizeof(device_offset_info) / sizeof(struct device_offset); i++) {
if (card->pci_device_id == device_offset_info[i].device_id){
unsigned int phys_addr = (card->bar0 + device_offset_info[i].hotspot_offset);
unsigned int base_offset = phys_addr & ~(PG_SZ-1);
void *map_base = mmap(0, PG_SZ, PROT_READ, MAP_SHARED, fd, base_offset);
if(map_base == (void *) -1)
{
if (fd != -1)
close(fd);
printf("Can't read memory for hotspot. If you are root, enable kernel parameter iomem=relaxed\n");
}
void *virt_addr = (char *) map_base + (phys_addr - base_offset);
int read_result = *((unsigned int *) virt_addr);
temp = (read_result >> 8) & 0xff;
munmap(map_base, PG_SZ);
}
}
close(fd);
return temp; return temp;
} }

2
icx3.c
View File

@ -237,6 +237,6 @@ void set_fan(int fan, char *setting, struct card_info *card)
if (fan_readback.fanmode != fan_control.fanmode || if (fan_readback.fanmode != fan_control.fanmode ||
fan_readback.rpm_offset != fan_control.rpm_offset || fan_readback.rpm_offset != fan_control.rpm_offset ||
fan_readback.duty != fan_control.duty) fan_readback.duty != fan_control.duty)
printf("Error setting fan %d on %s\n", fan, card->i2c_fd); printf("Error setting fan %d on %s\n", fan, card->i2c_dev_path);
} }

View File

@ -10,15 +10,14 @@ void init_nvml()
printf("Could not init NVML: %s\n", nvmlErrorString(result)); printf("Could not init NVML: %s\n", nvmlErrorString(result));
} }
int get_nvml_handle(nvmlDevice_t *device, struct card_info *card) void get_nvml_handle(struct card_info *card)
{ {
nvmlReturn_t result; nvmlReturn_t result;
result = nvmlDeviceGetHandleByPciBusId_v2(card->pci_id, device); result = nvmlDeviceGetHandleByPciBusId_v2(card->pci_id, card->nvml_device);
if (result != NVML_SUCCESS) { if (result != NVML_SUCCESS) {
printf("Failed to get device handle for card at %s: %s\n", card->pci_id, nvmlErrorString(result)); printf("Failed to get NVML device handle for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
return 0; card->nvml_device = NULL;
} }
return 1;
} }
void print_nvml_clock_reason(int compact, struct card_info *card) void print_nvml_clock_reason(int compact, struct card_info *card)
@ -52,12 +51,11 @@ void print_nvml_clock_reason(int compact, struct card_info *card)
unsigned int get_nvml_temp(struct card_info *card) unsigned int get_nvml_temp(struct card_info *card)
{ {
nvmlDevice_t nvml_device; if (card->nvml_device == NULL)
if (!get_nvml_handle(&nvml_device, card))
return 0; return 0;
unsigned int temp; unsigned int temp;
nvmlReturn_t result = nvmlDeviceGetTemperature(nvml_device, NVML_TEMPERATURE_GPU, &temp); nvmlReturn_t result = nvmlDeviceGetTemperature(*(nvmlDevice_t*)(card->nvml_device), NVML_TEMPERATURE_GPU, &temp);
if (result != NVML_SUCCESS) { if (result != NVML_SUCCESS) {
printf("Failed to get temperature for card at %s: %s\n", card->pci_id, nvmlErrorString(result)); printf("Failed to get temperature for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
return 0; return 0;
@ -67,12 +65,11 @@ unsigned int get_nvml_temp(struct card_info *card)
unsigned long long get_nvml_clock_reasons(struct card_info *card) unsigned long long get_nvml_clock_reasons(struct card_info *card)
{ {
nvmlDevice_t nvml_device; if (card->nvml_device == NULL)
if (!get_nvml_handle(&nvml_device, card))
return 0; return 0;
unsigned long long reasons; unsigned long long reasons;
nvmlReturn_t result = nvmlDeviceGetCurrentClocksEventReasons(nvml_device, &reasons) ; nvmlReturn_t result = nvmlDeviceGetCurrentClocksEventReasons(*(nvmlDevice_t*)(card->nvml_device), &reasons) ;
if (result != NVML_SUCCESS) { if (result != NVML_SUCCESS) {
printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result)); printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
return 0; return 0;
@ -83,12 +80,11 @@ unsigned long long get_nvml_clock_reasons(struct card_info *card)
unsigned int get_nvml_mem_util(struct card_info *card) unsigned int get_nvml_mem_util(struct card_info *card)
{ {
nvmlDevice_t nvml_device; if (card->nvml_device == NULL)
if (!get_nvml_handle(&nvml_device, card))
return 0; return 0;
nvmlUtilization_t util; nvmlUtilization_t util;
nvmlReturn_t result = nvmlDeviceGetUtilizationRates(nvml_device, &util); nvmlReturn_t result = nvmlDeviceGetUtilizationRates(*(nvmlDevice_t*)(card->nvml_device), &util);
if (result != NVML_SUCCESS) { if (result != NVML_SUCCESS) {
printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result)); printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
return 0; return 0;

View File

@ -22,7 +22,7 @@ static struct clock_reason clock_reason_names[] =
}; };
void init_nvml(); void init_nvml();
int get_nvml_handle(nvmlDevice_t *device, struct card_info *card); void get_nvml_handle(struct card_info *card);
void print_nvml_clock_reason(int compact, struct card_info *card); void print_nvml_clock_reason(int compact, struct card_info *card);
unsigned int get_nvml_temp(struct card_info *card); unsigned int get_nvml_temp(struct card_info *card);
unsigned long long get_nvml_clock_reasons(struct card_info *card); unsigned long long get_nvml_clock_reasons(struct card_info *card);