Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion internal/hub/systems/system_manager.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package systems

import (
"context"
"errors"
"fmt"
"time"
Expand Down Expand Up @@ -45,6 +46,8 @@ type SystemManager struct {
systems *store.Store[string, *System] // Thread-safe store of active systems
sshConfig *ssh.ClientConfig // SSH client configuration for system connections
smartFetchMap *expirymap.ExpiryMap[smartFetchState] // Stores last SMART fetch time/result; TTL is only for cleanup
ctx context.Context // Cancelled when the manager is shutting down
cancel context.CancelFunc // Cancels ctx
}

// hubLike defines the interface requirements for the hub dependency.
Expand All @@ -60,10 +63,13 @@ type hubLike interface {
// NewSystemManager creates a new SystemManager instance with the provided hub.
// The hub must implement the hubLike interface to provide database and alert functionality.
func NewSystemManager(hub hubLike) *SystemManager {
ctx, cancel := context.WithCancel(context.Background())
return &SystemManager{
systems: store.New(map[string]*System{}),
hub: hub,
smartFetchMap: expirymap.New[smartFetchState](time.Hour),
ctx: ctx,
cancel: cancel,
}
}

Expand Down Expand Up @@ -103,7 +109,13 @@ func (sm *SystemManager) Initialize() error {
sleepTime := time.Duration(delta) * time.Millisecond

for _, system := range systems {
time.Sleep(sleepTime)
select {
case <-sm.ctx.Done():
// Abort if the manager has been shut down (e.g. test cleanup)
// to avoid starting updater goroutines against a torn-down hub. See #1950.
return
case <-time.After(sleepTime):
}
_ = sm.AddSystem(system)
}
}()
Expand Down Expand Up @@ -238,6 +250,11 @@ func (sm *SystemManager) onRecordAfterDeleteSuccess(e *core.RecordEvent) error {
// It validates required fields, initializes the system context, and starts the update goroutine.
// Returns error if a system with the same ID already exists.
func (sm *SystemManager) AddSystem(sys *System) error {
if sm.ctx.Err() != nil {
// Manager is shutting down; do not start new updater goroutines
// against a hub that may be torn down. See #1950.
return sm.ctx.Err()
}
if sm.systems.Has(sys.Id) {
return errSystemExists
}
Expand Down
19 changes: 19 additions & 0 deletions internal/hub/systems/system_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
package systems

import (
"context"
"testing"

"github.com/henrygd/beszel/internal/entities/system"
"github.com/pocketbase/pocketbase/tools/store"
)

func TestCombinedData_MigrateDeprecatedFields(t *testing.T) {
Expand Down Expand Up @@ -157,3 +159,20 @@ func TestCombinedData_MigrateDeprecatedFields(t *testing.T) {
}
})
}

// TestAddSystemRefusedAfterShutdown verifies that AddSystem returns an error
// once the manager has been shut down, so the staggered Initialize starter
// cannot spawn updater goroutines against a torn-down hub. See #1950.
func TestAddSystemRefusedAfterShutdown(t *testing.T) {
sm := &SystemManager{systems: store.New(map[string]*System{})}
sm.ctx, sm.cancel = context.WithCancel(context.Background())
sm.cancel()

err := sm.AddSystem(&System{Id: "sys", Host: "127.0.0.1"})
if err == nil {
t.Fatalf("AddSystem returned nil error after shutdown")
}
if sm.systems.Has("sys") {
t.Fatalf("system was added to store after shutdown")
}
}
5 changes: 5 additions & 0 deletions internal/hub/systems/systems_test_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@ func (sm *SystemManager) SetSystemStatusInDB(systemID string, status string) boo

// TESTING ONLY: RemoveAllSystems removes all systems from the store
func (sm *SystemManager) RemoveAllSystems() {
// Signal shutdown first so any in-flight Initialize staggered-start goroutine
// stops adding new systems against a hub that is about to be torn down. See #1950.
if sm.cancel != nil {
sm.cancel()
}
for _, system := range sm.systems.GetAll() {
sm.RemoveSystem(system.Id)
}
Expand Down
Loading