Compare commits

...

4 Commits

Author SHA1 Message Date
5e78a059a6 Fix in fan error handling 2025-02-21 11:32:02 -08:00
20e2683110 Reuse memory mappings 2025-02-21 11:26:17 -08:00
b7d22ed9ac Reduced number of NVML function calls 2025-02-21 10:59:31 -08:00
46eb773820 Readme update 2025-02-17 04:33:35 -08:00
7 changed files with 66 additions and 64 deletions

View File

@ -14,6 +14,8 @@ A supported EVGA 30-series card with iCX3. This includes:
The number of fans supported depends, of course, on your particular model.
You must have the `i2c-dev` kernel module loaded with `modprobe i2c-dev`
Access to the `/dev/i2c` device files, which means either:
* Run as root, or
* Install udev rules to allow user access. If you have the OpenRGB udev rules installed to control the LEDs you already have this set up.
@ -39,7 +41,7 @@ Note that when controlling fans directly through iCX3 they will fall offline fro
```text
Available options:
--i2c N : Only probe I2C bus N instead of all (may help with stuttering with --watch)
--i2c N : Only probe I2C bus N instead of all (may help with stuttering or freezing when probing I2C devices)
--gpu N : Control only GPU N instead of all supported cards
--fan SPEED : Set all fans at once to SPEED (see below)
--fanN SPEED : Set fan N (0-3) to SPEED

View File

@ -91,6 +91,9 @@ struct card_info {
int i2c_fd; /* File descriptor for the i2c device file, for re-use */
int product_id; /* EVGA internal product ID, as reported by the iCX3 controller */
unsigned int bar0; /* Address of the card's PCI base address register */
void *nvml_device; /* Pointer to nvmlDevice_t for use in NVML calls */
void *vram_addr; /* Memory mapping for GDDR6 temps */
void *hotspot_addr; /* Memory mapping for hotspot temperature */
};
struct gpu_pci_info {

View File

@ -24,7 +24,7 @@ char *header_start = "";
char *header_end = "";
static const char helpstring[] = "Available options:\n"
"--i2c N : Only probe I2C bus N instead of all (may help with stuttering with --watch)\n"
"--i2c N : Only probe I2C bus N instead of all (may help with stuttering or freezing when probing I2C devices)\n"
"--gpu N : Control only GPU N instead of all supported cards\n"
"--fan SPEED : Set all fans at once to SPEED (see below)\n"
"--fanN SPEED : Set fan N (0-3) to SPEED\n"
@ -163,6 +163,8 @@ int main (int argc, char **argv)
/* NVML init */
#ifdef USE_NVML
init_nvml();
for (int i = 0; i < gpu_count; i++)
get_nvml_handle(&gpus[i]);
#endif
/* PCI init for VRAM/hotspot temps */

91
gddr6.c
View File

@ -36,70 +36,69 @@ void init_gddr6(struct card_info *card)
card->bar0 = (pci_dev->base_addr[0] & 0xFFFFFFFF);
pci_cleanup(pacc);
}
float get_vram_temp(struct card_info *card)
{
int fd;
float temp = 0.0;
/* Open our memory mappings */
card->vram_addr = NULL;
card->hotspot_addr = NULL;
int fd;
if ((fd = open("/dev/mem", O_RDWR | O_SYNC)) == -1)
{
printf("Can't read memory. If you are root, enable kernel parameter iomem=relaxed\n");
return 0;
printf("Can't read memory for VRAM and Hotspot temperatures. If you are root, enable kernel parameter iomem=relaxed\n");
return;
}
unsigned int phys_addr, base_offset;
void *map_base;
for (int i = 0; i < sizeof(device_offset_info) / sizeof(struct device_offset); i++) {
if (card->pci_device_id == device_offset_info[i].device_id){
unsigned int phys_addr = (card->bar0 + device_offset_info[i].vram_offset);
unsigned int base_offset = phys_addr & ~(PG_SZ-1);
void *map_base = mmap(0, PG_SZ, PROT_READ, MAP_SHARED, fd, base_offset);
/* Map for VRAM */
phys_addr = (card->bar0 + device_offset_info[i].vram_offset);
base_offset = phys_addr & ~(PG_SZ-1);
map_base = mmap(0, PG_SZ, PROT_READ, MAP_SHARED, fd, base_offset);
if(map_base == (void *) -1)
{
if (fd != -1)
close(fd);
printf("Can't read memory for VRAM temperature. If you are root, enable kernel parameter iomem=relaxed\n");
}
void *virt_addr = (char *) map_base + (phys_addr - base_offset);
int read_result = *((unsigned int *) virt_addr);
temp = ((read_result & 0x00000fff) / 0x20);
munmap(map_base, PG_SZ);
printf("Can't map memory for VRAM temperature. If you are root, enable kernel parameter iomem=relaxed\n");
else
card->vram_addr = (void *) map_base + (phys_addr - base_offset);
/* Map for hotspot */
phys_addr = (card->bar0 + device_offset_info[i].hotspot_offset);
base_offset = phys_addr & ~(PG_SZ-1);
map_base = mmap(0, PG_SZ, PROT_READ, MAP_SHARED, fd, base_offset);
if(map_base == (void *) -1)
printf("Can't map memory for Hotspot temperature. If you are root, enable kernel parameter iomem=relaxed\n");
else
card->hotspot_addr = (void *) map_base + (phys_addr - base_offset);
}
}
close(fd);
}
float get_vram_temp(struct card_info *card)
{
float temp = 0.0;
if(card->vram_addr == NULL)
return 0.0;
int read_result = *((unsigned int *) card->vram_addr);
temp = ((read_result & 0x00000fff) / 0x20);
return temp;
}
float get_hotspot_temp(struct card_info *card)
{
int fd;
float temp = 0.0;
if ((fd = open("/dev/mem", O_RDWR | O_SYNC)) == -1)
{
printf("Can't read memory. If you are root, enable kernel parameter iomem=relaxed\n");
return 0;
}
for (int i = 0; i < sizeof(device_offset_info) / sizeof(struct device_offset); i++) {
if (card->pci_device_id == device_offset_info[i].device_id){
unsigned int phys_addr = (card->bar0 + device_offset_info[i].hotspot_offset);
unsigned int base_offset = phys_addr & ~(PG_SZ-1);
void *map_base = mmap(0, PG_SZ, PROT_READ, MAP_SHARED, fd, base_offset);
if(map_base == (void *) -1)
{
if (fd != -1)
close(fd);
printf("Can't read memory for hotspot. If you are root, enable kernel parameter iomem=relaxed\n");
}
void *virt_addr = (char *) map_base + (phys_addr - base_offset);
int read_result = *((unsigned int *) virt_addr);
temp = (read_result >> 8) & 0xff;
munmap(map_base, PG_SZ);
}
}
close(fd);
if(card->hotspot_addr == NULL)
return 0.0;
int read_result = *((unsigned int *) card->hotspot_addr);
temp = (read_result >> 8) & 0xff;
return temp;
}

2
icx3.c
View File

@ -237,6 +237,6 @@ void set_fan(int fan, char *setting, struct card_info *card)
if (fan_readback.fanmode != fan_control.fanmode ||
fan_readback.rpm_offset != fan_control.rpm_offset ||
fan_readback.duty != fan_control.duty)
printf("Error setting fan %d on %s\n", fan, card->i2c_fd);
printf("Error setting fan %d on %s\n", fan, card->i2c_dev_path);
}

View File

@ -10,15 +10,14 @@ void init_nvml()
printf("Could not init NVML: %s\n", nvmlErrorString(result));
}
int get_nvml_handle(nvmlDevice_t *device, struct card_info *card)
void get_nvml_handle(struct card_info *card)
{
nvmlReturn_t result;
result = nvmlDeviceGetHandleByPciBusId_v2(card->pci_id, device);
result = nvmlDeviceGetHandleByPciBusId_v2(card->pci_id, card->nvml_device);
if (result != NVML_SUCCESS) {
printf("Failed to get device handle for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
return 0;
printf("Failed to get NVML device handle for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
card->nvml_device = NULL;
}
return 1;
}
void print_nvml_clock_reason(int compact, struct card_info *card)
@ -52,12 +51,11 @@ void print_nvml_clock_reason(int compact, struct card_info *card)
unsigned int get_nvml_temp(struct card_info *card)
{
nvmlDevice_t nvml_device;
if (!get_nvml_handle(&nvml_device, card))
if (card->nvml_device == NULL)
return 0;
unsigned int temp;
nvmlReturn_t result = nvmlDeviceGetTemperature(nvml_device, NVML_TEMPERATURE_GPU, &temp);
nvmlReturn_t result = nvmlDeviceGetTemperature(*(nvmlDevice_t*)(card->nvml_device), NVML_TEMPERATURE_GPU, &temp);
if (result != NVML_SUCCESS) {
printf("Failed to get temperature for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
return 0;
@ -67,12 +65,11 @@ unsigned int get_nvml_temp(struct card_info *card)
unsigned long long get_nvml_clock_reasons(struct card_info *card)
{
nvmlDevice_t nvml_device;
if (!get_nvml_handle(&nvml_device, card))
if (card->nvml_device == NULL)
return 0;
unsigned long long reasons;
nvmlReturn_t result = nvmlDeviceGetCurrentClocksEventReasons(nvml_device, &reasons) ;
nvmlReturn_t result = nvmlDeviceGetCurrentClocksEventReasons(*(nvmlDevice_t*)(card->nvml_device), &reasons) ;
if (result != NVML_SUCCESS) {
printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
return 0;
@ -83,12 +80,11 @@ unsigned long long get_nvml_clock_reasons(struct card_info *card)
unsigned int get_nvml_mem_util(struct card_info *card)
{
nvmlDevice_t nvml_device;
if (!get_nvml_handle(&nvml_device, card))
if (card->nvml_device == NULL)
return 0;
nvmlUtilization_t util;
nvmlReturn_t result = nvmlDeviceGetUtilizationRates(nvml_device, &util);
nvmlReturn_t result = nvmlDeviceGetUtilizationRates(*(nvmlDevice_t*)(card->nvml_device), &util);
if (result != NVML_SUCCESS) {
printf("Failed to get clock reasons for card at %s: %s\n", card->pci_id, nvmlErrorString(result));
return 0;

View File

@ -22,7 +22,7 @@ static struct clock_reason clock_reason_names[] =
};
void init_nvml();
int get_nvml_handle(nvmlDevice_t *device, struct card_info *card);
void get_nvml_handle(struct card_info *card);
void print_nvml_clock_reason(int compact, struct card_info *card);
unsigned int get_nvml_temp(struct card_info *card);
unsigned long long get_nvml_clock_reasons(struct card_info *card);