From 002e3fef38b1c2db583ff0c887eae6dfd1fd69d0 Mon Sep 17 00:00:00 2001 From: "Joseph R. Quinn" <423821+quinnjr@users.noreply.github.com> Date: Fri, 1 May 2026 12:15:35 -0400 Subject: [PATCH] feat(users): backfill user.name from oauth_github.login `users.name` is currently set only when the GitHub profile has a display name, so a sizable fraction of accounts have it as NULL. That makes the column unusable as a guaranteed identifier and means any feature that wants a non-NULL name has to fall back to gh_login on its own. Fill in the NULLs with `oauth_github.login` (the GitHub username) so every account has a name. The DO-block walks `users.id` in 5000-row windows and COMMITs between batches so row locks release between iterations and concurrent session-login upserts aren't blocked. lock_timeout / statement_timeout cap each batch in case of unexpected contention. Idempotent: filtered on `users.name IS NULL`, so re-running is a no-op once every account has a name. Follows the same out-of-band pattern as data_oauth_github.sql -- run with `psql -f ` and do NOT wrap in BEGIN/COMMIT, since the COMMIT inside the DO block requires the block to be at the top level. The session-login upsert path is unchanged: future logins will still write the GitHub display name when one is set, overwriting any backfilled value with the user's actual chosen name. --- migrations/data_user_name_backfill.sql | 40 ++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 migrations/data_user_name_backfill.sql diff --git a/migrations/data_user_name_backfill.sql b/migrations/data_user_name_backfill.sql new file mode 100644 index 00000000000..dca63de7721 --- /dev/null +++ b/migrations/data_user_name_backfill.sql @@ -0,0 +1,40 @@ +-- Backfill `users.name` from `oauth_github.login` for users whose `name` is +-- still NULL. `users.name` is currently set only when the GitHub profile has +-- a display name, so users without one have NULL there; this fills those in +-- with the GitHub username so every account has a non-NULL name. +-- +-- Iterates by `users.id` range and COMMITs between batches so row locks +-- release between iterations and concurrent traffic isn't blocked. The DO +-- block must be run outside an explicit transaction (`psql -f ` is +-- fine; do NOT wrap with BEGIN/COMMIT, since COMMIT inside DO requires the +-- block to be at the top level). +-- +-- Idempotent: re-running the file is a no-op once every account has a name, +-- because the UPDATE filters on `users.name IS NULL`. + +SET lock_timeout = '5s'; +SET statement_timeout = '60s'; + +DO $$ +DECLARE + lo INT; + hi INT; + pos INT; + batch_size CONSTANT INT := 5000; +BEGIN + SELECT MIN(id), MAX(id) INTO lo, hi FROM users WHERE name IS NULL; + IF lo IS NULL THEN RETURN; END IF; + + pos := lo; + WHILE pos <= hi LOOP + UPDATE users + SET name = oauth_github.login + FROM oauth_github + WHERE oauth_github.user_id = users.id + AND users.name IS NULL + AND users.id >= pos + AND users.id < pos + batch_size; + COMMIT; + pos := pos + batch_size; + END LOOP; +END $$;