First commit
This commit is contained in:
84
gpus/nvidia.py
Normal file
84
gpus/nvidia.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import subprocess
|
||||
import csv
|
||||
import logging
|
||||
|
||||
def safe_float(value):
|
||||
try:
|
||||
return float(value)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def extract_nvidia_gpu_info():
|
||||
query_fields = [
|
||||
"index", "uuid", "name", "temperature.gpu", "utilization.gpu",
|
||||
"utilization.memory", "pstate", "memory.total", "memory.used",
|
||||
"fan.speed", "power.draw", "clocks.current.graphics", "clocks.current.memory",
|
||||
"pci.bus_id"
|
||||
]
|
||||
cmd = [
|
||||
"/usr/bin/nvidia-smi",
|
||||
f"--query-gpu={','.join(query_fields)}",
|
||||
"--format=csv,noheader,nounits"
|
||||
]
|
||||
try:
|
||||
output = subprocess.check_output(cmd, encoding="utf-8")
|
||||
except Exception as e:
|
||||
logging.error(f"Error running nvidia-smi: {e}")
|
||||
return []
|
||||
|
||||
reader = csv.reader(output.strip().split("\n"))
|
||||
gpu_list = []
|
||||
|
||||
for row in reader:
|
||||
values = dict(zip(query_fields, row))
|
||||
mem_total = safe_float(values["memory.total"])
|
||||
mem_used = safe_float(values["memory.used"])
|
||||
mem_util = (mem_used / mem_total * 100) if mem_used and mem_total else None
|
||||
|
||||
gpu_info = {
|
||||
"vendor": "nvidia",
|
||||
"gpu_id": f'gpu{values["index"]}',
|
||||
"uuid": values["uuid"],
|
||||
"name": values["name"],
|
||||
"pci_bus": values["pci.bus_id"],
|
||||
"gpu_temp_c": safe_float(values["temperature.gpu"]),
|
||||
"fan_speed_percent": safe_float(values["fan.speed"]),
|
||||
"power_w": safe_float(values["power.draw"]),
|
||||
"gpu_clock_mhz": safe_float(values["clocks.current.graphics"]),
|
||||
"mem_clock_mhz": safe_float(values["clocks.current.memory"]),
|
||||
"gpu_util_percent": safe_float(values["utilization.gpu"]),
|
||||
"mem_util_percent": mem_util,
|
||||
"memory_used_mib": mem_used,
|
||||
"memory_total_mib": mem_total,
|
||||
}
|
||||
gpu_list.append(gpu_info)
|
||||
|
||||
return gpu_list
|
||||
|
||||
def extract_nvidia_processes():
|
||||
try:
|
||||
output = subprocess.check_output([
|
||||
"/usr/bin/nvidia-smi", "--query-compute-apps=pid,process_name,used_gpu_memory,gpu_uuid",
|
||||
"--format=csv,noheader,nounits"
|
||||
], encoding="utf-8")
|
||||
except Exception as e:
|
||||
logging.error(f"Error fetching NVIDIA processes: {e}")
|
||||
return []
|
||||
|
||||
processes = []
|
||||
for line in output.strip().splitlines():
|
||||
parts = [p.strip() for p in line.split(",")]
|
||||
if len(parts) != 4:
|
||||
continue
|
||||
pid, name, mem, uuid = parts
|
||||
processes.append({
|
||||
"vendor": "nvidia",
|
||||
"pid": int(pid),
|
||||
"name": name,
|
||||
"gpu_id": uuid,
|
||||
"gpu_name": "N/A", # Optionally map UUID to name
|
||||
"used_memory_mib": float(mem),
|
||||
"gpu_util_percent": 0 # NVIDIA does not expose per-process utilization directly
|
||||
})
|
||||
|
||||
return processes
|
||||
Reference in New Issue
Block a user