Skip to content

Commit 253762d

Browse files
committed
fix: handle microsecond unit correctly in metric names
Also parse the metrics with currently unknown metric name on a best-effort basis instead of hard-failing. Signed-off-by: Utku Ozdemir <utkuozdemir@gmail.com>
1 parent cfa5cd1 commit 253762d

4 files changed

Lines changed: 108 additions & 39 deletions

File tree

go.mod

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ require (
1010
github.com/prometheus/common v0.63.0
1111
github.com/prometheus/exporter-toolkit v0.14.0
1212
github.com/stretchr/testify v1.10.0
13+
github.com/thejerf/slogassert v0.3.4
1314
)
1415

1516
require (
@@ -23,15 +24,15 @@ require (
2324
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
2425
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
2526
github.com/pmezard/go-difflib v1.0.0 // indirect
26-
github.com/prometheus/client_model v0.6.1 // indirect
27+
github.com/prometheus/client_model v0.6.2 // indirect
2728
github.com/prometheus/procfs v0.16.0 // indirect
2829
github.com/xhit/go-str2duration/v2 v2.1.0 // indirect
29-
golang.org/x/crypto v0.36.0 // indirect
30-
golang.org/x/net v0.37.0 // indirect
31-
golang.org/x/oauth2 v0.28.0 // indirect
32-
golang.org/x/sync v0.12.0 // indirect
33-
golang.org/x/sys v0.31.0 // indirect
34-
golang.org/x/text v0.23.0 // indirect
30+
golang.org/x/crypto v0.37.0 // indirect
31+
golang.org/x/net v0.39.0 // indirect
32+
golang.org/x/oauth2 v0.29.0 // indirect
33+
golang.org/x/sync v0.13.0 // indirect
34+
golang.org/x/sys v0.32.0 // indirect
35+
golang.org/x/text v0.24.0 // indirect
3536
google.golang.org/protobuf v1.36.6 // indirect
3637
gopkg.in/yaml.v2 v2.4.0 // indirect
3738
gopkg.in/yaml.v3 v3.0.1 // indirect

go.sum

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
3838
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
3939
github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q=
4040
github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0=
41-
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
42-
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
41+
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
42+
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
4343
github.com/prometheus/common v0.63.0 h1:YR/EIY1o3mEFP/kZCD7iDMnLPlGyuU2Gb3HIcXnA98k=
4444
github.com/prometheus/common v0.63.0/go.mod h1:VVFF/fBIoToEnWRVkYoXEkq3R3paCoxG9PXP74SnV18=
4545
github.com/prometheus/exporter-toolkit v0.14.0 h1:NMlswfibpcZZ+H0sZBiTjrA3/aBFHkNZqE+iCj5EmRg=
@@ -58,20 +58,22 @@ github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl
5858
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
5959
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
6060
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
61+
github.com/thejerf/slogassert v0.3.4 h1:VoTsXixRbXMrRSSxDjYTiEDCM4VWbsYPW5rB/hX24kM=
62+
github.com/thejerf/slogassert v0.3.4/go.mod h1:0zn9ISLVKo1aPMTqcGfG1o6dWwt+Rk574GlUxHD4rs8=
6163
github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc=
6264
github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU=
63-
golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
64-
golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
65-
golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c=
66-
golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
67-
golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc=
68-
golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
69-
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
70-
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
71-
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
72-
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
73-
golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
74-
golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
65+
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
66+
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
67+
golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY=
68+
golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E=
69+
golang.org/x/oauth2 v0.29.0 h1:WdYw2tdTK1S8olAzWHdgeqfy+Mtm9XNhv/xJsY65d98=
70+
golang.org/x/oauth2 v0.29.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
71+
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
72+
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
73+
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
74+
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
75+
golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
76+
golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
7577
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
7678
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
7779
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

internal/exporter/exporter.go

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ func New(ctx context.Context, prefix string, nvidiaSmiCommand string, qFieldsRaw
8787
return nil, err
8888
}
8989

90-
qFieldToMetricInfoMap := BuildQFieldToMetricInfoMap(prefix, qFieldToRFieldMap)
90+
qFieldToMetricInfoMap := BuildQFieldToMetricInfoMap(prefix, qFieldToRFieldMap, logger)
9191

9292
infoLabels := getLabels(requiredFields)
9393
exporter := GPUExporter{
@@ -381,17 +381,18 @@ func parseSanitizedValueWithBestEffort(
381381
func BuildQFieldToMetricInfoMap(
382382
prefix string,
383383
qFieldtoRFieldMap map[QField]RField,
384+
logger *slog.Logger,
384385
) map[QField]MetricInfo {
385386
result := make(map[QField]MetricInfo)
386387
for qField, rField := range qFieldtoRFieldMap {
387-
result[qField] = BuildMetricInfo(prefix, rField)
388+
result[qField] = BuildMetricInfo(prefix, rField, logger)
388389
}
389390

390391
return result
391392
}
392393

393-
func BuildMetricInfo(prefix string, rField RField) MetricInfo {
394-
fqName, multiplier := BuildFQNameAndMultiplier(prefix, rField)
394+
func BuildMetricInfo(prefix string, rField RField, logger *slog.Logger) MetricInfo {
395+
fqName, multiplier := BuildFQNameAndMultiplier(prefix, rField, logger)
395396
desc := prometheus.NewDesc(fqName, string(rField), []string{"uuid"}, nil)
396397

397398
return MetricInfo{
@@ -401,28 +402,45 @@ func BuildMetricInfo(prefix string, rField RField) MetricInfo {
401402
}
402403
}
403404

404-
func BuildFQNameAndMultiplier(prefix string, rField RField) (string, float64) {
405+
func BuildFQNameAndMultiplier(prefix string, rField RField, logger *slog.Logger) (string, float64) {
405406
rFieldStr := string(rField)
406407
suffixTransformed := rFieldStr
407408
multiplier := 1.0
408409
split := strings.Split(rFieldStr, " ")[0]
409410

410-
//nolint:gocritic
411-
if strings.HasSuffix(rFieldStr, " [W]") {
411+
switch {
412+
case strings.HasSuffix(rFieldStr, " [W]"):
412413
suffixTransformed = split + "_watts"
413-
} else if strings.HasSuffix(rFieldStr, " [MHz]") {
414+
case strings.HasSuffix(rFieldStr, " [MHz]"):
414415
suffixTransformed = split + "_clock_hz"
415416
multiplier = 1000000
416-
} else if strings.HasSuffix(rFieldStr, " [MiB]") {
417+
case strings.HasSuffix(rFieldStr, " [MiB]"):
417418
suffixTransformed = split + "_bytes"
418419
multiplier = 1048576
419-
} else if strings.HasSuffix(rFieldStr, " [%]") {
420+
case strings.HasSuffix(rFieldStr, " [%]"):
420421
suffixTransformed = split + "_ratio"
421422
multiplier = 0.01
423+
case strings.HasSuffix(rFieldStr, " [us]"):
424+
suffixTransformed = split + "_seconds"
425+
multiplier = 0.000001
426+
}
427+
428+
suffixTransformed = strings.ReplaceAll(suffixTransformed, ".", "_")
429+
suffixTransformed = util.ToSnakeCase(suffixTransformed)
430+
431+
if strings.ContainsAny(suffixTransformed, " []") {
432+
suffixTransformed = strings.ReplaceAll(suffixTransformed, " [", "_")
433+
suffixTransformed = strings.ReplaceAll(suffixTransformed, "]", "")
434+
435+
logger.Error("returned field contains unexpected characters, "+
436+
"it is parsed it with best effort, but it might get renamed in the future. "+
437+
"please report it in the project's issue tracker",
438+
"rfield_name", rFieldStr,
439+
"parsed_name", suffixTransformed,
440+
)
422441
}
423442

424-
metricName := util.ToSnakeCase(strings.ReplaceAll(suffixTransformed, ".", "_"))
425-
fqName := prometheus.BuildFQName(prefix, "", metricName)
443+
fqName := prometheus.BuildFQName(prefix, "", suffixTransformed)
426444

427445
return fqName, multiplier
428446
}

internal/exporter/exporter_test.go

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
_ "embed"
66
"fmt"
7+
"log/slog"
78
"os"
89
"os/exec"
910
"slices"
@@ -15,6 +16,7 @@ import (
1516
"github.com/prometheus/client_golang/prometheus"
1617
"github.com/stretchr/testify/assert"
1718
"github.com/stretchr/testify/require"
19+
"github.com/thejerf/slogassert"
1820

1921
"github.com/utkuozdemir/nvidia_gpu_exporter/internal/exporter"
2022
)
@@ -88,7 +90,11 @@ func TestTransformRawMultiplier(t *testing.T) {
8890
func TestBuildFQNameAndMultiplierRegular(t *testing.T) {
8991
t.Parallel()
9092

91-
fqName, multiplier := exporter.BuildFQNameAndMultiplier("prefix", "encoder.stats.sessionCount")
93+
fqName, multiplier := exporter.BuildFQNameAndMultiplier(
94+
"prefix",
95+
"encoder.stats.sessionCount",
96+
slogt.New(t),
97+
)
9298

9399
assertFloat(t, 1, multiplier)
94100
assert.Equal(t, "prefix_encoder_stats_session_count", fqName)
@@ -97,7 +103,11 @@ func TestBuildFQNameAndMultiplierRegular(t *testing.T) {
97103
func TestBuildFQNameAndMultiplierWatts(t *testing.T) {
98104
t.Parallel()
99105

100-
fqName, multiplier := exporter.BuildFQNameAndMultiplier("prefix", "power.draw [W]")
106+
fqName, multiplier := exporter.BuildFQNameAndMultiplier(
107+
"prefix",
108+
"power.draw [W]",
109+
slogt.New(t),
110+
)
101111

102112
assertFloat(t, 1, multiplier)
103113
assert.Equal(t, "prefix_power_draw_watts", fqName)
@@ -106,7 +116,11 @@ func TestBuildFQNameAndMultiplierWatts(t *testing.T) {
106116
func TestBuildFQNameAndMultiplierMiB(t *testing.T) {
107117
t.Parallel()
108118

109-
fqName, multiplier := exporter.BuildFQNameAndMultiplier("prefix", "memory.total [MiB]")
119+
fqName, multiplier := exporter.BuildFQNameAndMultiplier(
120+
"prefix",
121+
"memory.total [MiB]",
122+
slogt.New(t),
123+
)
110124

111125
assertFloat(t, 1048576, multiplier)
112126
assert.Equal(t, "prefix_memory_total_bytes", fqName)
@@ -118,6 +132,7 @@ func TestBuildFQNameAndMultiplierMHZ(t *testing.T) {
118132
fqName, multiplier := exporter.BuildFQNameAndMultiplier(
119133
"prefix",
120134
"clocks.current.graphics [MHz]",
135+
slogt.New(t),
121136
)
122137

123138
assertFloat(t, 1000000, multiplier)
@@ -127,16 +142,33 @@ func TestBuildFQNameAndMultiplierMHZ(t *testing.T) {
127142
func TestBuildFQNameAndMultiplierRatio(t *testing.T) {
128143
t.Parallel()
129144

130-
fqName, multiplier := exporter.BuildFQNameAndMultiplier("prefix", "fan.speed [%]")
145+
fqName, multiplier := exporter.BuildFQNameAndMultiplier("prefix", "fan.speed [%]", slogt.New(t))
131146

132147
assertFloat(t, 0.01, multiplier)
133148
assert.Equal(t, "prefix_fan_speed_ratio", fqName)
134149
}
135150

151+
func TestBuildFQNameAndMultiplierMicroseconds(t *testing.T) {
152+
t.Parallel()
153+
154+
fqName, multiplier := exporter.BuildFQNameAndMultiplier(
155+
"prefix",
156+
"clocks_event_reasons_counters.sw_thermal_slowdown [us]",
157+
slogt.New(t),
158+
)
159+
160+
assertFloat(t, 0.000001, multiplier)
161+
assert.Equal(t, "prefix_clocks_event_reasons_counters_sw_thermal_slowdown_seconds", fqName)
162+
}
163+
136164
func TestBuildFQNameAndMultiplierNoPrefix(t *testing.T) {
137165
t.Parallel()
138166

139-
fqName, multiplier := exporter.BuildFQNameAndMultiplier("", "encoder.stats.sessionCount")
167+
fqName, multiplier := exporter.BuildFQNameAndMultiplier(
168+
"",
169+
"encoder.stats.sessionCount",
170+
slogt.New(t),
171+
)
140172

141173
assertFloat(t, 1, multiplier)
142174
assert.Equal(t, "encoder_stats_session_count", fqName)
@@ -145,18 +177,34 @@ func TestBuildFQNameAndMultiplierNoPrefix(t *testing.T) {
145177
func TestBuildMetricInfo(t *testing.T) {
146178
t.Parallel()
147179

148-
metricInfo := exporter.BuildMetricInfo("prefix", "encoder.stats.sessionCount")
180+
metricInfo := exporter.BuildMetricInfo("prefix", "encoder.stats.sessionCount", slogt.New(t))
149181

150182
assertFloat(t, 1, metricInfo.ValueMultiplier)
151183
assert.Equal(t, prometheus.GaugeValue, metricInfo.MType)
152184
}
153185

186+
func TestBuildMetricInfoInvalidName(t *testing.T) {
187+
t.Parallel()
188+
189+
handler := slogassert.New(t, slog.LevelError, nil)
190+
logger := slog.New(handler)
191+
192+
exporter.BuildMetricInfo("prefix", "foo.bar [asdf]", logger)
193+
194+
handler.AssertMessage(
195+
"returned field contains unexpected characters, it is parsed it with best effort, " +
196+
"but it might get renamed in the future. please report it in the project's issue tracker",
197+
)
198+
}
199+
154200
func TestBuildQFieldToMetricInfoMap(t *testing.T) {
155201
t.Parallel()
156202

203+
logger := slogt.New(t)
157204
qFieldToMetricInfoMap := exporter.BuildQFieldToMetricInfoMap(
158205
"prefix",
159206
map[exporter.QField]exporter.RField{"aaa": "AAA", "bbb": "BBB"},
207+
logger,
160208
)
161209

162210
assert.Len(t, qFieldToMetricInfoMap, 2)

0 commit comments

Comments
 (0)