Skip to content

Commit d920a96

Browse files
committed
fix: lock when doing clenaup
1 parent 10e1281 commit d920a96

2 files changed

Lines changed: 61 additions & 22 deletions

File tree

pkg/engine/idle_cleanup.go

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ type IdleCleanupManager struct {
1919
checkInterval time.Duration
2020
lastActivity atomic.Int64 // Unix timestamp in seconds
2121
stopChan chan struct{}
22-
cleanupCount atomic.Int64 // Counter for cleanup operations (useful for metrics)
23-
activeProcesses atomic.Int32 // Number of active image processing operations
24-
cleanupMu sync.Mutex // Protects cleanup operations
22+
cleanupCount atomic.Int64 // Counter for cleanup operations (useful for metrics)
23+
activeProcesses atomic.Int32 // Number of active image processing operations
24+
processingMu sync.RWMutex // RWMutex: RLock for image processing, Lock for cleanup (blocks all processing)
2525
wg sync.WaitGroup // Tracks background goroutine to prevent leaks
26-
cleanupFunc func() // Function to call for cleanup (defaults to bimg.VipsCacheDropAll, can be mocked in tests)
26+
cleanupFunc func() // Function to call for cleanup (defaults to bimg.VipsCacheDropAll, can be mocked in tests)
2727
}
2828

2929
// NewIdleCleanupManager creates a new IdleCleanupManager
@@ -88,25 +88,28 @@ func (m *IdleCleanupManager) RecordActivity() {
8888
m.lastActivity.Store(time.Now().Unix())
8989
}
9090

91-
// BeginProcessing increments the active process counter
91+
// BeginProcessing acquires a read lock and increments the active process counter
9292
// Call this before starting image processing to prevent cleanup during processing
93+
// This will BLOCK if cleanup is currently running
9394
func (m *IdleCleanupManager) BeginProcessing() {
9495
if !m.enabled {
9596
return
9697
}
9798

99+
m.processingMu.RLock() // Acquire read lock - blocks if cleanup is running
98100
m.activeProcesses.Add(1)
99101
m.RecordActivity()
100102
}
101103

102-
// EndProcessing decrements the active process counter
104+
// EndProcessing decrements the active process counter and releases the read lock
103105
// Call this after image processing completes (use defer for safety)
104106
func (m *IdleCleanupManager) EndProcessing() {
105107
if !m.enabled {
106108
return
107109
}
108110

109111
m.activeProcesses.Add(-1)
112+
m.processingMu.RUnlock() // Release read lock
110113
}
111114

112115
// GetCleanupCount returns the number of cleanup operations performed
@@ -163,28 +166,34 @@ func (m *IdleCleanupManager) checkAndCleanup() {
163166
}
164167

165168
// performCleanup actually performs the libvips cache cleanup
166-
// It is thread-safe and will only perform cleanup if no image processing is active
169+
// It acquires an exclusive write lock, blocking all image processing until cleanup completes
170+
// This ensures no images are being processed during cleanup
167171
func (m *IdleCleanupManager) performCleanup() {
168-
// Lock to ensure only one cleanup at a time
169-
m.cleanupMu.Lock()
170-
defer m.cleanupMu.Unlock()
172+
// Acquire write lock - this will:
173+
// 1. Wait for all current image processing (RLock holders) to complete
174+
// 2. Block any new image processing from starting
175+
m.processingMu.Lock()
176+
defer m.processingMu.Unlock()
171177

172-
// Check if there are active image processing operations
178+
// Safety check: verify no active processes (should always be 0 due to lock)
173179
activeCount := m.activeProcesses.Load()
174-
if activeCount > 0 {
175-
monitoring.Log().Debug("Skipping cleanup due to active processing",
180+
if activeCount != 0 {
181+
monitoring.Log().Error("CRITICAL: Active processes detected despite holding cleanup lock!",
176182
zap.Int32("activeProcesses", activeCount))
177183
return
178184
}
179185

186+
monitoring.Log().Info("Acquired cleanup lock, all image processing blocked",
187+
zap.Int32("activeProcesses", activeCount))
188+
180189
// Get memory stats before cleanup
181190
memBefore := bimg.VipsMemory()
182191

183192
monitoring.Log().Info("Performing libvips cache cleanup",
184193
zap.Int64("memoryBefore", memBefore.Memory),
185194
zap.Int64("memoryAllocations", memBefore.Allocations))
186195

187-
// Perform cleanup - safe because no active processing
196+
// Perform cleanup - safe because we have exclusive lock
188197
// Use the injected cleanup function (defaults to bimg.VipsCacheDropAll)
189198
if m.cleanupFunc != nil {
190199
m.cleanupFunc()

pkg/engine/idle_cleanup_test.go

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -522,24 +522,54 @@ func TestIdleCleanupManager_BeginEndProcessing_Concurrent(t *testing.T) {
522522
assert.Equal(t, int32(0), mgr.activeProcesses.Load())
523523
}
524524

525-
// TestIdleCleanupManager_PerformCleanup_WithActiveProcesses verifies cleanup is skipped when processing
525+
// TestIdleCleanupManager_PerformCleanup_WithActiveProcesses verifies cleanup blocks until processing completes
526526
func TestIdleCleanupManager_PerformCleanup_WithActiveProcesses(t *testing.T) {
527527
t.Parallel()
528528

529529
mgr := NewIdleCleanupManager(true, 15)
530530
mgr.cleanupFunc = mockCleanupFunc // Use mock to avoid interfering with libvips
531531

532-
// Mark as having active processes
533-
mgr.BeginProcessing()
534-
defer mgr.EndProcessing()
532+
// Start processing in a goroutine
533+
processingDone := make(chan bool)
534+
go func() {
535+
mgr.BeginProcessing()
536+
time.Sleep(100 * time.Millisecond) // Hold the lock for 100ms
537+
mgr.EndProcessing()
538+
processingDone <- true
539+
}()
535540

541+
// Give processing time to start
542+
time.Sleep(20 * time.Millisecond)
543+
544+
// Try to perform cleanup - this should BLOCK until processing completes
545+
cleanupDone := make(chan bool)
536546
initialCount := mgr.GetCleanupCount()
547+
go func() {
548+
mgr.performCleanup()
549+
cleanupDone <- true
550+
}()
537551

538-
// Try to perform cleanup
539-
mgr.performCleanup()
552+
// Verify cleanup hasn't completed yet (still blocked)
553+
select {
554+
case <-cleanupDone:
555+
t.Fatal("cleanup should be blocked while processing is active")
556+
case <-time.After(50 * time.Millisecond):
557+
// Good - cleanup is blocked as expected
558+
}
559+
560+
// Wait for processing to complete
561+
<-processingDone
540562

541-
// Cleanup should be skipped, count should not increase
542-
assert.Equal(t, initialCount, mgr.GetCleanupCount(), "cleanup should be skipped when processes are active")
563+
// Now cleanup should complete
564+
select {
565+
case <-cleanupDone:
566+
// Good - cleanup completed after processing finished
567+
case <-time.After(200 * time.Millisecond):
568+
t.Fatal("cleanup should complete after processing ends")
569+
}
570+
571+
// Cleanup should have run, count should increase
572+
assert.Equal(t, initialCount+1, mgr.GetCleanupCount(), "cleanup should run after processes complete")
543573
}
544574

545575
// TestIdleCleanupManager_PerformCleanup_NoActiveProcesses verifies cleanup runs when no processing

0 commit comments

Comments
 (0)