1 #include <ObSensorGpu.h>
20 if (
type() == SensorType::VALUE ||
type() == SensorType::SPEED) {
21 _logger->info(
"Searching for GPU ...");
23 if (nvmlInit() == NVML_SUCCESS) {
24 nvmlDeviceGetCount(&
_count);
29 char *devName =
static_cast<char *
>(malloc(64 *
sizeof(
char)));
31 for (
unsigned int i = 0; i <
_count; i++) {
32 nvmlDeviceGetHandleByIndex(i, &dev);
33 nvmlDeviceGetName(dev, devName, 64);
34 _logger->debug(
"GPU {} : {}", i, devName);
45 _logger->warn(
"No GPU cards found");
69 _logger->trace(
"ObSensorGpu::Doing process '{}' with count {} ...",
_name,
72 for (
unsigned int i = 0; i <
_count; i++) {
73 _logger->trace(
"ObSensorGpu::Getting info from GPU {}", i);
77 unsigned int count = 0;
78 nvmlDeviceGetComputeRunningProcesses(
fDevices[i], &count,
nullptr);
80 nvmlDeviceGetComputeRunningProcesses(
fDevices[i], &count,
83 nvmlDeviceGetGraphicsRunningProcesses(
fDevices[i], &count,
nullptr);
85 nvmlDeviceGetGraphicsRunningProcesses(
fDevices[i], &count,
107 using namespace fmt::literals;
109 std::string
json =
"";
110 _logger->trace(
"ObSensorGpu::JSON name={} count={}...", name,
_count);
114 json += fmt::format(R
"("{}": {{ "gpus" : [)", name);
116 for (
unsigned int i = 0; i <
_count; i++) {
122 R
"("id": {id}, "name": "{name}",)"
125 R"("free": {{ "value": {freeV}, "alpha": {freeA:.2f} }},)"
126 R"("used": {{ "value": {usedV}, "alpha": {usedA:.2f} }},)"
127 R"("total": {{ "value": {totalV}, "alpha": 1.0 }})"
131 "id"_a = i,
"name"_a = stat.name,
"load"_a = stat.util.gpu,
133 "freeV"_a = stat.mem.free,
134 "freeA"_a = (static_cast<double>(stat.mem.free) / stat.mem.total),
135 "usedV"_a = stat.mem.used,
136 "usedA"_a = (static_cast<double>(stat.mem.used) / stat.mem.total),
137 "totalV"_a = stat.mem.total
145 json += R
"("pids": [)";
146 if (compute.size() + graphics.size() > 0) {
149 for (
unsigned int j = 0; j < compute.size(); j++) {
150 auto &proc = compute[j];
153 R"("type": "proc", "pid": {pid},)"
154 R"("user": "{user}", "mem": {mem})"
157 "pid"_a = proc.pid,
"user"_a =
getProcUser(proc.pid),
158 "mem"_a = proc.usedGpuMemory
163 if (
json.back() ==
',')
168 auto &proc = graphics[j];
171 R"("type": "graphics", "pid": {pid},)"
172 R"("user": "{user}", "mem": {mem})"
175 "pid"_a = proc.pid,
"user"_a =
getProcUser(proc.pid),
176 "mem"_a = proc.usedGpuMemory
180 if (
json.back() ==
',')
187 if (
json.back() ==
',')
200 redi::ipstream in(
"ps -p" + std::to_string(pid) +
" -ouser=");
202 std::getline(in, tmp);
ObSensorGpu(std::string _name={"gpu"})
std::shared_ptr< spdlog::logger > _logger
Pointer to spd logger.
std::vector< std::vector< nvmlProcessInfo_t > > fComputeProc
Compute processes (ex. CUDA Applications)
void process() override
Process function.
Structure for device stat simplification.
ObSensor * _first
Pointer to first sensor.
ObSensor * _change
Pointer to change sensor.
std::string getProcUser(unsigned int) const
ObSensor * _second
Pointer to second sensor.
std::string name
Device name reported by NVML.
std::string json(const std::string name={"static"}) const override
unsigned int _count
Count of NVIDIA devices.
void speed(ObSensor *s1, ObSensor *s2, unsigned int timeout=1000) override
Calculate time change (speed)
bool enabled() const
Returns flag if sensor is enabled.
virtual ~ObSensorGpu() override
std::vector< nvmlDevice_t > fDevices
NVML device handles.
SensorType type() const
Returns sensor type.
std::vector< devStat_t > fStatus
Device statistics.
std::string _name
Sensor name.
void type(SensorType t)
Sets sensor type.
std::vector< std::vector< nvmlProcessInfo_t > > fGraphicsProc
Graphics processes (ex. desktop env)