Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
218 changes: 176 additions & 42 deletions internal/alerts/alerts_system.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"strings"
"time"

"github.com/henrygd/beszel/internal/entities/container"
"github.com/henrygd/beszel/internal/entities/system"

"github.com/pocketbase/dbx"
Expand All @@ -19,7 +20,6 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
dbx.NewExp("system={:system} AND name!='Status'", dbx.Params{"system": systemRecord.Id}),
)
if err != nil || len(alertRecords) == 0 {
// log.Println("no alerts found for system")
return nil
}

Expand Down Expand Up @@ -71,24 +71,42 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
continue
}
val = float64(data.Stats.Battery[0])
default:
// check for container alert
if strings.HasPrefix(name, "Container ") {
containerName := strings.TrimPrefix(name, "Container ")
// find container in data.Containers
for _, ctr := range data.Containers {
if ctr.Name == containerName {
val = float64(ctr.Health)
unit = ""
break
}
}
}
}

triggered := alertRecord.GetBool("triggered")
threshold := alertRecord.GetFloat("value")

// Battery alert has inverted logic: trigger when value is BELOW threshold
lowAlert := isLowAlert(name)

// CONTINUE
// For normal alerts: IF not triggered and curValue <= threshold, OR triggered and curValue > threshold
// For low alerts (Battery): IF not triggered and curValue >= threshold, OR triggered and curValue < threshold
if lowAlert {
if (!triggered && val >= threshold) || (triggered && val < threshold) {
if strings.HasPrefix(name, "Container ") {
if !triggered && val == float64(container.DockerHealthUnhealthy) {
// if not voting and unhealthy, trigger
} else if triggered && val != float64(container.DockerHealthUnhealthy) {
// if triggered and not unhealthy, resolve
} else {
continue
}
} else {
if (!triggered && val <= threshold) || (triggered && val > threshold) {
continue
lowAlert := isLowAlert(name)
if lowAlert {
if (!triggered && val >= threshold) || (triggered && val < threshold) {
continue
}
} else {
if (!triggered && val <= threshold) || (triggered && val > threshold) {
continue
}
}
}

Expand All @@ -107,12 +125,18 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst

// send alert immediately if min is 1 - no need to sum up values.
if min == 1 {
if lowAlert {
alert.triggered = val < threshold
if strings.HasPrefix(name, "Container ") {
alert.triggered = val == float64(container.DockerHealthUnhealthy)
go am.sendContainerAlert(alert)
} else {
alert.triggered = val > threshold
lowAlert := isLowAlert(name)
if lowAlert {
alert.triggered = val < threshold
} else {
alert.triggered = val > threshold
}
go am.sendSystemAlert(alert)
}
go am.sendSystemAlert(alert)
continue
}

Expand All @@ -124,10 +148,10 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
validAlerts = append(validAlerts, alert)
}

systemStats := []struct {
var systemStats []struct {
Stats []byte `db:"stats"`
Created types.DateTime `db:"created"`
}{}
}

err = am.hub.DB().
Select("stats", "created").
Expand Down Expand Up @@ -244,6 +268,46 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
alert.count++
}
}

var containerStatsRecords []struct {
Stats []byte `db:"stats"`
Created types.DateTime `db:"created"`
}
_ = am.hub.DB().Select("stats", "created").From("container_stats").
Where(dbx.NewExp("system={:system} AND type='1m' AND created > {:created}", dbx.Params{
"system": systemRecord.Id,
"created": oldestTime.Add(-time.Second * 90),
})).All(&containerStatsRecords)
// log.Println("Found container stats records:", len(containerStatsRecords))

for _, stat := range containerStatsRecords {
var appStats []container.Stats
if err := json.Unmarshal(stat.Stats, &appStats); err != nil {
continue
}
statTime := stat.Created.Time()
for j := range validAlerts {
alert := &validAlerts[j]
if !strings.HasPrefix(alert.name, "Container ") {
continue
}
if statTime.Before(alert.time) {
continue
}
containerName := strings.TrimPrefix(alert.name, "Container ")
for _, ctr := range appStats {
if ctr.Name == containerName {
// log.Printf("DEBUG ALERT: Found stats for %s: Health=%d time=%v", containerName, ctr.Health, statTime)
if ctr.Health == container.DockerHealthUnhealthy {
alert.val += 1
}
alert.count++
break
}
}
}
}
// log.Printf("DEBUG ALERT END: %s val=%f count=%d min=%d", alert.name, alert.val, alert.count, alert.min)
// sum up vals for each alert
for _, alert := range validAlerts {
switch alert.name {
Expand All @@ -268,30 +332,40 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
}
alert.val = float64(maxTemp)
default:
alert.val = alert.val / float64(alert.count)
if !strings.HasPrefix(alert.name, "Container ") {
alert.val = alert.val / float64(alert.count)
}
}
minCount := float32(alert.min) / 1.2
// log.Println("alert", alert.name, "val", alert.val, "threshold", alert.threshold, "triggered", alert.triggered)
// log.Printf("%s: val %f | count %d | min-count %f | threshold %f\n", alert.name, alert.val, alert.count, minCount, alert.threshold)
// pass through alert if count is greater than or equal to minCount
if float32(alert.count) >= minCount {
// Battery alert has inverted logic: trigger when value is BELOW threshold
lowAlert := isLowAlert(alert.name)
if lowAlert {
if !alert.triggered && alert.val < alert.threshold {
if strings.HasPrefix(alert.name, "Container ") {
if !alert.triggered && float32(alert.val) >= minCount {
alert.triggered = true
go am.sendSystemAlert(alert)
} else if alert.triggered && alert.val >= alert.threshold {
go am.sendContainerAlert(alert)
} else if alert.triggered && float32(alert.val) < minCount {
alert.triggered = false
go am.sendSystemAlert(alert)
go am.sendContainerAlert(alert)
}
} else {
if !alert.triggered && alert.val > alert.threshold {
alert.triggered = true
go am.sendSystemAlert(alert)
} else if alert.triggered && alert.val <= alert.threshold {
alert.triggered = false
go am.sendSystemAlert(alert)
// Battery alert has inverted logic: trigger when value is BELOW threshold
lowAlert := isLowAlert(alert.name)
if lowAlert {
if !alert.triggered && alert.val < alert.threshold {
alert.triggered = true
go am.sendSystemAlert(alert)
} else if alert.triggered && alert.val >= alert.threshold {
alert.triggered = false
go am.sendSystemAlert(alert)
}
} else {
if !alert.triggered && alert.val > alert.threshold {
alert.triggered = true
go am.sendSystemAlert(alert)
} else if alert.triggered && alert.val <= alert.threshold {
alert.triggered = false
go am.sendSystemAlert(alert)
}
}
}
}
Expand Down Expand Up @@ -319,18 +393,26 @@ func (am *AlertManager) sendSystemAlert(alert SystemAlertData) {
}

var subject string
lowAlert := isLowAlert(alert.name)
if alert.triggered {
if lowAlert {
subject = fmt.Sprintf("%s %s below threshold", systemName, titleAlertName)
if strings.HasPrefix(alert.name, "Container ") {
if alert.triggered {
subject = fmt.Sprintf("%s %s is unhealthy", systemName, titleAlertName)
} else {
subject = fmt.Sprintf("%s %s above threshold", systemName, titleAlertName)
subject = fmt.Sprintf("%s %s is healthy", systemName, titleAlertName)
}
} else {
if lowAlert {
subject = fmt.Sprintf("%s %s above threshold", systemName, titleAlertName)
lowAlert := isLowAlert(alert.name)
if alert.triggered {
if lowAlert {
subject = fmt.Sprintf("%s %s below threshold", systemName, titleAlertName)
} else {
subject = fmt.Sprintf("%s %s above threshold", systemName, titleAlertName)
}
} else {
subject = fmt.Sprintf("%s %s below threshold", systemName, titleAlertName)
if lowAlert {
subject = fmt.Sprintf("%s %s above threshold", systemName, titleAlertName)
} else {
subject = fmt.Sprintf("%s %s below threshold", systemName, titleAlertName)
}
}
}
minutesLabel := "minute"
Expand All @@ -344,9 +426,61 @@ func (am *AlertManager) sendSystemAlert(alert SystemAlertData) {

alert.alertRecord.Set("triggered", alert.triggered)
if err := am.hub.Save(alert.alertRecord); err != nil {
// app.Logger().Error("failed to save alert record", "err", err)
am.hub.Logger().Error("failed to save alert record", "err", err)
return
}
// manually create alert history record to ensure it's logged
if alert.triggered {
_, _ = createAlertHistoryRecord(am.hub, alert.alertRecord)
}

am.SendAlert(AlertMessageData{
UserID: alert.alertRecord.GetString("user"),
SystemID: alert.systemRecord.Id,
Title: subject,
Message: body,
Link: am.hub.MakeLink("system", alert.systemRecord.Id),
LinkText: "View " + systemName,
})
}

func (am *AlertManager) sendContainerAlert(alert SystemAlertData) {
systemName := alert.systemRecord.GetString("name")
containerName := strings.TrimPrefix(alert.name, "Container ")

var subject string
if alert.triggered {
subject = fmt.Sprintf("%s Container %s is unhealthy", systemName, containerName)
} else {
subject = fmt.Sprintf("%s Container %s is healthy", systemName, containerName)
}

var body string
if alert.min == 1 {
if alert.triggered {
body = fmt.Sprintf("Container %s is unhealthy.", containerName)
} else {
body = fmt.Sprintf("Container %s is healthy.", containerName)
}
} else {
minutesLabel := "minutes"
if alert.triggered {
body = fmt.Sprintf("Container %s was unhealthy for the majority of the previous %d %s.", containerName, alert.min, minutesLabel)
} else {
body = fmt.Sprintf("Container %s has recovered and is healthy.", containerName)
}
}

alert.alertRecord.Set("triggered", alert.triggered)
if err := am.hub.Save(alert.alertRecord); err != nil {
am.hub.Logger().Error("failed to save alert record", "err", err)
return
}
// manually create alert history record to ensure it's logged
if alert.triggered {
_, _ = createAlertHistoryRecord(am.hub, alert.alertRecord)
}

am.SendAlert(AlertMessageData{
UserID: alert.alertRecord.GetString("user"),
SystemID: alert.systemRecord.Id,
Expand Down
4 changes: 2 additions & 2 deletions internal/entities/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ type Stats struct {
NetworkSent float64 `json:"ns" cbor:"3,keyasint"`
NetworkRecv float64 `json:"nr" cbor:"4,keyasint"`

Health DockerHealth `json:"-" cbor:"5,keyasint"`
Status string `json:"-" cbor:"6,keyasint"`
Health DockerHealth `json:"h" cbor:"5,keyasint"`
Status string `json:"s" cbor:"6,keyasint"`
Id string `json:"-" cbor:"7,keyasint"`
Image string `json:"-" cbor:"8,keyasint"`
// PrevCpu [2]uint64 `json:"-"`
Expand Down
74 changes: 74 additions & 0 deletions internal/migrations/1737815000_convert_alerts_name_to_text.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package migrations

import (
"encoding/json"

"github.com/pocketbase/dbx"
"github.com/pocketbase/pocketbase/core"
m "github.com/pocketbase/pocketbase/migrations"
)

func init() {
m.Register(func(app core.App) error {
// DIRECT DB UPDATE to bypass validation and visibility issues
type CollectionRow struct {
Id string `db:"id"`
Fields string `db:"fields"`
}
var row CollectionRow

// 1. Read raw JSON
err := app.DB().NewQuery("SELECT id, fields FROM _collections WHERE name='alerts'").One(&row)
if err != nil {
return err
}

// 2. Parse fields
var fields []map[string]interface{}
if err := json.Unmarshal([]byte(row.Fields), &fields); err != nil {
return err
}

// 3. Modify
found := false
for i, f := range fields {
if name, ok := f["name"].(string); ok && name == "name" {
fields[i]["type"] = "text"
delete(fields[i], "values")
delete(fields[i], "maxSelect")
found = true
break
}
}

if !found {
return nil
}

// 4. Marshal back
newJson, err := json.Marshal(fields)
if err != nil {
return err
}

// 5. Update raw
_, err = app.DB().NewQuery("UPDATE _collections SET fields={:fields} WHERE id={:id}").Bind(dbx.Params{
"fields": string(newJson),
"id": row.Id,
}).Execute()

return err
}, func(app core.App) error {
// revert
// collection, err := app.FindCollectionByNameOrId("alerts")
// if err != nil {
// return err
// }

// We would need to set options here if we reverted, but this is a complex struct.
// For now, let's just make it text to be safe, or we can assume we don't need perfect revert for this dev fix.
// Ideally we reconstruct the select field.

return nil
})
}
Loading