diff --git a/credstore/file_store_test.go b/credstore/file_store_test.go index 1006b06..4867f02 100644 --- a/credstore/file_store_test.go +++ b/credstore/file_store_test.go @@ -182,12 +182,6 @@ func TestFileStore_ConcurrentWrites(t *testing.T) { ) } } - - // Verify no lock files remain - lockPath := filePath + ".lock" - if _, err := os.Stat(lockPath); !os.IsNotExist(err) { - t.Errorf("Lock file still exists after all saves completed") - } } func TestFileStore_SaveEmptyClientID(t *testing.T) { diff --git a/credstore/filelock.go b/credstore/filelock.go index f5398b7..6f52bc1 100644 --- a/credstore/filelock.go +++ b/credstore/filelock.go @@ -1,70 +1,58 @@ package credstore import ( - "errors" + "context" "fmt" - "os" "time" + + "github.com/gofrs/flock" ) const ( - lockMaxRetries = 50 - lockRetryDelay = 100 * time.Millisecond - staleLockTimeout = 30 * time.Second + // lockAcquireTimeout caps how long acquireFileLock will wait for a peer + // to release. The kernel releases advisory locks automatically on process + // death, so this is only a safety net for genuinely stuck holders. + lockAcquireTimeout = 30 * time.Second + + // lockRetryInterval controls the poll frequency while waiting for a lock. + lockRetryInterval = 100 * time.Millisecond ) -// fileLock represents a file lock. +// fileLock is an exclusive advisory lock on a sibling `.lock` file. +// The lock is backed by gofrs/flock, which uses fcntl on POSIX and +// LockFileEx on Windows — the kernel releases the lock automatically if the +// holder crashes, so no stale-lock heuristics are needed. type fileLock struct { - lockFile *os.File - lockPath string + fl *flock.Flock } -// acquireFileLock acquires an exclusive lock on the token file. -// Uses a separate lock file to coordinate access across processes. +// acquireFileLock acquires an exclusive lock on `.lock`. The lock +// file is created if missing and is left on disk after release (the file's +// presence is not what protects access — the advisory lock is). A stranded +// lock file from a crashed process does not block new acquirers because once +// no process holds the advisory lock, including after a crash, it becomes +// immediately available to be acquired again. func acquireFileLock(filePath string) (*fileLock, error) { lockPath := filePath + ".lock" - for range lockMaxRetries { - lockFile, err := os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600) - if err == nil { - return &fileLock{ - lockFile: lockFile, - lockPath: lockPath, - }, nil - } - - if os.IsExist(err) { - if info, statErr := os.Stat(lockPath); statErr == nil { - if time.Since(info.ModTime()) > staleLockTimeout { - if remErr := os.Remove(lockPath); remErr != nil && !os.IsNotExist(remErr) { - return nil, fmt.Errorf( - "failed to remove stale lock file %s: %w", - lockPath, - remErr, - ) - } - continue - } - } - time.Sleep(lockRetryDelay) - continue - } + ctx, cancel := context.WithTimeout(context.Background(), lockAcquireTimeout) + defer cancel() - return nil, fmt.Errorf("failed to acquire file lock: %w", err) + fl := flock.New(lockPath) + locked, err := fl.TryLockContext(ctx, lockRetryInterval) + if err != nil { + return nil, fmt.Errorf("acquire lock %q: %w", lockPath, err) + } + if !locked { + return nil, fmt.Errorf( + "timeout waiting for file lock %q after %v", lockPath, lockAcquireTimeout, + ) } - return nil, fmt.Errorf( - "timeout waiting for file lock after %v", - time.Duration(lockMaxRetries)*lockRetryDelay, - ) + return &fileLock{fl: fl}, nil } -// release releases the file lock. -func (fl *fileLock) release() error { - var closeErr error - if fl.lockFile != nil { - closeErr = fl.lockFile.Close() - } - removeErr := os.Remove(fl.lockPath) - return errors.Join(closeErr, removeErr) +// release drops the advisory lock. The lock file itself is left on disk. +func (l *fileLock) release() error { + return l.fl.Unlock() } diff --git a/credstore/filelock_test.go b/credstore/filelock_test.go index a2b0e46..90ff56b 100644 --- a/credstore/filelock_test.go +++ b/credstore/filelock_test.go @@ -5,7 +5,6 @@ import ( "path/filepath" "sync" "testing" - "time" ) func TestAcquireAndRelease(t *testing.T) { @@ -26,9 +25,13 @@ func TestAcquireAndRelease(t *testing.T) { t.Errorf("release() error: %v", err) } - if _, err := os.Stat(lockPath); !os.IsNotExist(err) { - t.Error("lock file was not removed after release") + // The lock file is intentionally left on disk after release; advisory + // locking is what protects access, not the file's existence. + next, err := acquireFileLock(target) + if err != nil { + t.Fatalf("re-acquire after release: %v", err) } + _ = next.release() } func TestConcurrentLocks(t *testing.T) { @@ -69,7 +72,11 @@ func TestConcurrentLocks(t *testing.T) { wg.Wait() } -func TestStaleLockRemoval(t *testing.T) { +// TestAcquireOrphanedLockFile verifies that a leftover lock file from a +// crashed process does not block new acquirers. With kernel-level advisory +// locking (fcntl/LockFileEx), no process holds the lock once the crashed +// process is gone, so a new acquirer succeeds immediately. +func TestAcquireOrphanedLockFile(t *testing.T) { dir := t.TempDir() target := filepath.Join(dir, "tokens.json") lockPath := target + ".lock" @@ -78,16 +85,11 @@ func TestStaleLockRemoval(t *testing.T) { if err != nil { t.Fatal(err) } - f.Close() - - staleTime := time.Now().Add(-60 * time.Second) - if err := os.Chtimes(lockPath, staleTime, staleTime); err != nil { - t.Fatalf("os.Chtimes: %v", err) - } + _ = f.Close() lock, err := acquireFileLock(target) if err != nil { - t.Fatalf("acquireFileLock() with stale lock: %v", err) + t.Fatalf("acquireFileLock with orphaned file: %v", err) } _ = lock.release() } diff --git a/go.mod b/go.mod index db871d8..16bdb57 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.25.0 require ( github.com/appleboy/go-httpretry v0.11.0 + github.com/gofrs/flock v0.13.0 github.com/zalando/go-keyring v0.2.6 golang.org/x/sync v0.20.0 ) diff --git a/go.sum b/go.sum index 42b9534..9de3ea0 100644 --- a/go.sum +++ b/go.sum @@ -8,6 +8,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/godbus/dbus/v5 v5.2.2 h1:TUR3TgtSVDmjiXOgAAyaZbYmIeP3DPkld3jgKGV8mXQ= github.com/godbus/dbus/v5 v5.2.2/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c= +github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw= +github.com/gofrs/flock v0.13.0/go.mod h1:jxeyy9R1auM5S6JYDBhDt+E2TCo7DkratH4Pgi8P+Z0= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=