diff --git a/crates/crates_io_database/src/models/user.rs b/crates/crates_io_database/src/models/user.rs index fa86bf8bd96..7222ba67625 100644 --- a/crates/crates_io_database/src/models/user.rs +++ b/crates/crates_io_database/src/models/user.rs @@ -142,6 +142,9 @@ pub struct OauthGithub { pub avatar: Option, /// In the process of being migrated from `users.gh_encrypted_token`. pub encrypted_token: Vec, + /// The last time we verified with GitHub what the GitHub username for this user was, and + /// whether the account was valid + pub last_sync: DateTime, /// In the process of being migrated from `users.gh_login`. pub login: String, /// Foreign key to the `users` table. @@ -193,6 +196,7 @@ impl NewOauthGithub<'_> { oauth_github::encrypted_token.eq(excluded(oauth_github::encrypted_token)), oauth_github::login.eq(excluded(oauth_github::login)), oauth_github::avatar.eq(excluded(oauth_github::avatar)), + oauth_github::last_sync.eq(Utc::now()), )) .get_result(&mut conn) .await diff --git a/crates/crates_io_database/src/schema.rs b/crates/crates_io_database/src/schema.rs index d62115bc904..fbd19784fa3 100644 --- a/crates/crates_io_database/src/schema.rs +++ b/crates/crates_io_database/src/schema.rs @@ -676,6 +676,8 @@ diesel::table! { avatar -> Nullable, /// Encrypted GitHub access token encrypted_token -> Bytea, + /// The last time we verified with GitHub what the GitHub username for this user was, and whether the account was valid + last_sync -> Timestamptz, /// GitHub username login -> Varchar, /// Crates.io user ID foreign key diff --git a/crates/crates_io_database_dump/src/dump-db.toml b/crates/crates_io_database_dump/src/dump-db.toml index af0a2fc0dcf..67f57077c84 100644 --- a/crates/crates_io_database_dump/src/dump-db.toml +++ b/crates/crates_io_database_dump/src/dump-db.toml @@ -170,6 +170,7 @@ account_id = "private" encrypted_token = "private" login = "private" avatar = "private" +last_sync = "private" [oauth_github.column_defaults] encrypted_token = "''" diff --git a/crates/crates_io_github/examples/test_github_client.rs b/crates/crates_io_github/examples/test_github_client.rs index f8c259d495d..94d0a0bfdc2 100644 --- a/crates/crates_io_github/examples/test_github_client.rs +++ b/crates/crates_io_github/examples/test_github_client.rs @@ -17,6 +17,9 @@ enum Request { #[clap(long, env = "GITHUB_ACCESS_TOKEN", hide_env_values = true)] access_token: SecretString, }, + GetUserById { + account_id: i64, + }, OrgByName { org_name: String, #[clap(long, env = "GITHUB_ACCESS_TOKEN", hide_env_values = true)] @@ -68,6 +71,10 @@ async fn main() -> Result<()> { let response = github_client.get_user(&name, &access_token).await?; println!("{response:#?}"); } + Request::GetUserById { account_id } => { + let response = github_client.get_user_by_id(account_id).await?; + println!("{response:#?}"); + } Request::OrgByName { org_name, access_token, diff --git a/crates/crates_io_github/src/lib.rs b/crates/crates_io_github/src/lib.rs index 2179e773b28..ccba54204e9 100644 --- a/crates/crates_io_github/src/lib.rs +++ b/crates/crates_io_github/src/lib.rs @@ -26,6 +26,7 @@ type Result = std::result::Result; pub trait GitHubClient: Send + Sync { async fn current_user(&self, auth: &AccessToken) -> Result; async fn get_user(&self, name: &str, auth: &AccessToken) -> Result; + async fn get_user_by_id(&self, account_id: i64) -> Result; async fn org_by_name(&self, org_name: &str, auth: &AccessToken) -> Result; async fn team_by_name( &self, @@ -213,6 +214,11 @@ impl GitHubClient for RealGitHubClient { self.request(&url, auth).await } + async fn get_user_by_id(&self, account_id: i64) -> Result { + let url = format!("/user/{account_id}"); + self._request(&url, std::convert::identity).await + } + async fn org_by_name(&self, org_name: &str, auth: &AccessToken) -> Result { let url = format!("/orgs/{org_name}"); self.request(&url, auth).await diff --git a/migrations/2026-05-05-180007-0000_add_last_sync_to_oauth_github/down.sql b/migrations/2026-05-05-180007-0000_add_last_sync_to_oauth_github/down.sql new file mode 100644 index 00000000000..9fe9fd6385d --- /dev/null +++ b/migrations/2026-05-05-180007-0000_add_last_sync_to_oauth_github/down.sql @@ -0,0 +1,2 @@ +ALTER TABLE IF EXISTS oauth_github +DROP COLUMN last_sync; diff --git a/migrations/2026-05-05-180007-0000_add_last_sync_to_oauth_github/up.sql b/migrations/2026-05-05-180007-0000_add_last_sync_to_oauth_github/up.sql new file mode 100644 index 00000000000..54f99339ab8 --- /dev/null +++ b/migrations/2026-05-05-180007-0000_add_last_sync_to_oauth_github/up.sql @@ -0,0 +1,8 @@ +-- safety-assured:start +-- Adding a column with a constant value default is safe because we're using Postgres > 11. +ALTER TABLE IF EXISTS oauth_github +-- Set existing rows' last sync value to 1970. +ADD COLUMN last_sync timestamptz NOT NULL DEFAULT to_timestamp(0); +-- safety-assured:end + +comment on column oauth_github.last_sync is 'The last time we verified with GitHub what the GitHub username for this user was, and whether the account was valid'; diff --git a/src/bin/crates-admin/enqueue_job.rs b/src/bin/crates-admin/enqueue_job.rs index 01b2fe5be0f..12d28a349ac 100644 --- a/src/bin/crates-admin/enqueue_job.rs +++ b/src/bin/crates-admin/enqueue_job.rs @@ -1,7 +1,8 @@ use anyhow::Result; use chrono::NaiveDate; use crates_io::db; -use crates_io::schema::{background_jobs, crates}; +use crates_io::models::OauthGithub; +use crates_io::schema::{background_jobs, crates, oauth_github}; use crates_io::worker::jobs; use crates_io_worker::BackgroundJob; use diesel::dsl::exists; @@ -56,6 +57,14 @@ pub enum Command { SyncUpdatesFeed, TrustpubCleanup, UpdateDownloads, + /// Sync the oldest batch of users with GitHub + UpdateUserBatch { + #[arg(long = "dry-run")] + dry_run: bool, + + #[arg(long = "batch-size", default_value = "100")] + batch_size: usize, + }, } pub async fn run(command: Command) -> Result<()> { @@ -169,6 +178,39 @@ pub async fn run(command: Command) -> Result<()> { jobs::UpdateDownloads.enqueue(&conn).await?; } } + + Command::UpdateUserBatch { + dry_run, + batch_size, + } => { + let oldest_oauth_github_records = oauth_github::table + .order(oauth_github::last_sync.asc()) + .limit(batch_size as i64) + .load::(&mut conn) + .await?; + + for oauth_github in oldest_oauth_github_records { + let user_id = oauth_github.user_id; + let github_id = oauth_github.account_id; + let old_username = oauth_github.login.clone(); + + let job = jobs::UpdateUserFromGithub { + dry_run, + account_id: oauth_github.account_id, + }; + + // Don't stop the whole batch if one user update errors, but do log the error + if let Err(e) = job.enqueue(&conn).await { + error!( + error = %e, + user_id, + github_id, + old_username, + "Error running UpdateUserFromGithub" + ); + } + } + } }; Ok(()) diff --git a/src/tests/util/github.rs b/src/tests/util/github.rs index 8bb9b9e92fd..f605dd6e777 100644 --- a/src/tests/util/github.rs +++ b/src/tests/util/github.rs @@ -53,6 +53,9 @@ impl MockData { mock.expect_current_user() .returning(|_auth| self.current_user()); + mock.expect_get_user_by_id() + .returning(|account_id| self.get_user_by_id(account_id)); + mock.expect_org_by_name() .returning(|org_name, _auth| self.org_by_name(org_name)); @@ -81,6 +84,21 @@ impl MockData { }) } + fn get_user_by_id(&self, account_id: i64) -> Result { + let user = self + .users + .iter() + .find(|user| user.id as i64 == account_id) + .ok_or_else(not_found)?; + Ok(GitHubUser { + id: user.id, + login: user.login.into(), + name: Some(user.name.into()), + email: Some(user.email.into()), + avatar_url: Some(format!("https://avatars.example.com/{}", user.id)), + }) + } + fn org_by_name(&self, org_name: &str) -> Result { let org = self .orgs diff --git a/src/tests/worker/mod.rs b/src/tests/worker/mod.rs index d2d7f98e57b..3780a2e51f9 100644 --- a/src/tests/worker/mod.rs +++ b/src/tests/worker/mod.rs @@ -9,3 +9,4 @@ mod squash_index; mod sync_admins; mod trustpub; mod update_default_version; +mod update_user_from_github; diff --git a/src/tests/worker/update_user_from_github.rs b/src/tests/worker/update_user_from_github.rs new file mode 100644 index 00000000000..e5504bf3ce9 --- /dev/null +++ b/src/tests/worker/update_user_from_github.rs @@ -0,0 +1,347 @@ +use crate::util::TestApp; +use crates_io::{ + controllers::session, + models::{OauthGithub, User}, + schema::{background_jobs, oauth_github}, + util::gh_token_encryption::GitHubTokenEncryption, + worker::jobs, +}; +use crates_io_github::{GitHubError, GitHubUser, MockGitHubClient}; +use crates_io_worker::BackgroundJob; +use diesel::prelude::*; +use diesel_async::RunQueryDsl; +use std::sync::LazyLock; + +const GITHUB_ID: i64 = 456789; +const EXISTING_LOGIN: &str = "my-login"; +static ENCRYPTED_TOKEN: LazyLock> = LazyLock::new(|| { + GitHubTokenEncryption::for_testing() + .encrypt("some random token") + .unwrap() +}); + +struct UpdateTest { + dry_run: bool, + existing_github_id: i64, + existing_username: &'static str, + github_current_user_response: Result, + github_user_by_id_response: Result, + expected_username: &'static str, + expected_last_sync_updated: bool, +} + +impl UpdateTest { + async fn run(self) { + let Self { + dry_run, + existing_github_id, + existing_username, + github_current_user_response, + github_user_by_id_response, + expected_username, + expected_last_sync_updated, + } = self; + + let mut github_mock = MockGitHubClient::new(); + github_mock + .expect_current_user() + .return_once(|_| github_current_user_response); + github_mock + .expect_get_user_by_id() + .return_once(|_| github_user_by_id_response); + + let (app, _) = TestApp::full().with_github(github_mock).empty().await; + let emails = &app.as_inner().emails; + let mut conn = app.db_conn().await; + + let original_gh_user = github_user(existing_github_id, existing_username); + let u = + session::save_user_to_database(&original_gh_user, &ENCRYPTED_TOKEN, emails, &mut conn) + .await + .unwrap(); + + let oauth_github_before_update = oauth_github::table + .filter(oauth_github::user_id.eq(u.id)) + .first::(&mut conn) + .await + .unwrap(); + let last_sync_before_update = oauth_github_before_update.last_sync; + + let job = jobs::UpdateUserFromGithub { + dry_run, + account_id: oauth_github_before_update.account_id, + }; + job.enqueue(&conn).await.unwrap(); + let _ = app.try_run_pending_background_jobs().await; + + let oauth_github_after_update = oauth_github::table + .filter(oauth_github::user_id.eq(u.id)) + .first::(&mut conn) + .await + .unwrap(); + assert_eq!(expected_username, oauth_github_after_update.login); + if expected_last_sync_updated { + assert_ne!(last_sync_before_update, oauth_github_after_update.last_sync); + } else { + assert_eq!(last_sync_before_update, oauth_github_after_update.last_sync); + } + + // For now, we want to update the `User` record too + let user_after_update = User::find(&conn, u.id).await.unwrap(); + assert_eq!(expected_username, user_after_update.gh_login); + + // Drain the failed job so the `TestAppInner::drop` empty-queue + // post-condition is satisfied. + diesel::delete(background_jobs::table) + .execute(&mut conn) + .await + .unwrap(); + } +} + +fn github_user(id: i64, username: &str) -> GitHubUser { + GitHubUser { + login: username.into(), + id: id as i32, + avatar_url: None, + email: None, + name: None, + } +} + +#[tokio::test(flavor = "multi_thread")] +async fn no_updates_needed() { + UpdateTest { + dry_run: false, + existing_github_id: GITHUB_ID, + // What we have and what github has agree + existing_username: EXISTING_LOGIN, + github_current_user_response: Ok(github_user(GITHUB_ID, EXISTING_LOGIN)), + // Shouldn't be called in this test + github_user_by_id_response: Ok(github_user(GITHUB_ID, "wrong-user-info")), + expected_username: EXISTING_LOGIN, + // but still update last sync + expected_last_sync_updated: true, + } + .run() + .await; +} + +#[tokio::test(flavor = "multi_thread")] +async fn yes_updates_needed() { + UpdateTest { + dry_run: false, + existing_github_id: GITHUB_ID, + existing_username: EXISTING_LOGIN, + github_current_user_response: Ok(github_user(GITHUB_ID, "my-new-username")), + // Shouldn't be called in this test + github_user_by_id_response: Ok(github_user(GITHUB_ID, "wrong-user-info")), + expected_username: "my-new-username", + expected_last_sync_updated: true, + } + .run() + .await; +} + +#[tokio::test(flavor = "multi_thread")] +async fn github_deleted() { + UpdateTest { + dry_run: false, + existing_github_id: GITHUB_ID, + existing_username: EXISTING_LOGIN, + // If GitHub returns 404, this user has deleted their account. + github_current_user_response: Err(GitHubError::NotFound(anyhow::anyhow!("404 Not Found"))), + // Shouldn't be called in this test + github_user_by_id_response: Ok(github_user(GITHUB_ID, "wrong-user-info")), + expected_username: "ghost_1", + expected_last_sync_updated: true, + } + .run() + .await; +} + +#[tokio::test(flavor = "multi_thread")] +async fn still_deleted() { + UpdateTest { + dry_run: false, + existing_github_id: GITHUB_ID, + // We marked this user as deleted previously + existing_username: "ghost_1", + // GitHub still returns 404, they're still deleted + github_current_user_response: Err(GitHubError::NotFound(anyhow::anyhow!("404 Not Found"))), + // Shouldn't be called in this test + github_user_by_id_response: Ok(github_user(GITHUB_ID, "wrong-user-info")), + expected_username: "ghost_1", + expected_last_sync_updated: true, + } + .run() + .await; +} + +#[tokio::test(flavor = "multi_thread")] +async fn github_unauthorized_fallback_success_no_update() { + UpdateTest { + dry_run: false, + existing_github_id: GITHUB_ID, + existing_username: EXISTING_LOGIN, + // This could mean the user's oauth token has been revoked or similar + github_current_user_response: Err(GitHubError::Unauthorized(anyhow::anyhow!( + "Not allowed" + ))), + // Falling back to anonymous get_user_by_id shows no rename needed + github_user_by_id_response: Ok(github_user(GITHUB_ID, EXISTING_LOGIN)), + expected_username: EXISTING_LOGIN, + expected_last_sync_updated: true, + } + .run() + .await; +} + +#[tokio::test(flavor = "multi_thread")] +async fn github_unauthorized_enterprise_user() { + UpdateTest { + dry_run: false, + existing_github_id: GITHUB_ID, + existing_username: "asmith_microsoft", + // This could mean the user's oauth token has been revoked or similar + github_current_user_response: Err(GitHubError::Unauthorized(anyhow::anyhow!( + "Not allowed" + ))), + // Shouldn't be called in this test + github_user_by_id_response: Ok(github_user(GITHUB_ID, "wrong-user-info")), + expected_username: "asmith_microsoft", + expected_last_sync_updated: true, + } + .run() + .await; +} + +#[tokio::test(flavor = "multi_thread")] +async fn github_unauthorized_fallback_success_yes_update() { + UpdateTest { + dry_run: false, + existing_github_id: GITHUB_ID, + existing_username: EXISTING_LOGIN, + // This could mean the user's oauth token has been revoked or similar + github_current_user_response: Err(GitHubError::Unauthorized(anyhow::anyhow!( + "Not allowed" + ))), + // Falling back to anonymous get_user_by_id shows yes rename needed + github_user_by_id_response: Ok(github_user(GITHUB_ID, "updated-username")), + expected_username: "updated-username", + expected_last_sync_updated: true, + } + .run() + .await; +} + +#[tokio::test(flavor = "multi_thread")] +async fn github_unauthorized_fallback_not_found_deleted() { + UpdateTest { + dry_run: false, + existing_github_id: GITHUB_ID, + existing_username: EXISTING_LOGIN, + // This could mean the user's oauth token has been revoked or similar + github_current_user_response: Err(GitHubError::Unauthorized(anyhow::anyhow!( + "Not allowed" + ))), + // Falling back to anonymous get_user_by_id shows the user has been deleted + github_user_by_id_response: Err(GitHubError::NotFound(anyhow::anyhow!("404 Not Found"))), + expected_username: "ghost_1", + expected_last_sync_updated: true, + } + .run() + .await; +} + +#[tokio::test(flavor = "multi_thread")] +async fn github_unauthorized_fallback_other_error_no_update() { + UpdateTest { + dry_run: false, + existing_github_id: GITHUB_ID, + existing_username: EXISTING_LOGIN, + // This could mean the user's oauth token has been revoked or similar + github_current_user_response: Err(GitHubError::Unauthorized(anyhow::anyhow!( + "Not allowed" + ))), + // Falling back to anonymous get_user_by_id fails too; try again later + github_user_by_id_response: Err(GitHubError::Other(anyhow::anyhow!( + "Over your rate limit" + ))), + expected_username: EXISTING_LOGIN, + expected_last_sync_updated: false, + } + .run() + .await; +} + +#[tokio::test(flavor = "multi_thread")] +async fn github_forbidden_fallback_success_yes_update() { + UpdateTest { + dry_run: false, + existing_github_id: GITHUB_ID, + existing_username: EXISTING_LOGIN, + // This could mean the user's oauth token needs to be refreshed or similar + github_current_user_response: Err(GitHubError::Forbidden(anyhow::anyhow!("Not allowed"))), + // Falling back to anonymous get_user_by_id shows yes rename needed + github_user_by_id_response: Ok(github_user(GITHUB_ID, "updated-username")), + expected_username: "updated-username", + expected_last_sync_updated: true, + } + .run() + .await; +} + +#[tokio::test(flavor = "multi_thread")] +async fn github_unavailable() { + UpdateTest { + dry_run: false, + existing_github_id: GITHUB_ID, + existing_username: EXISTING_LOGIN, + // If GitHub returns some error we haven't accounted for, we can't know anything about + // this user. Try again later. + github_current_user_response: Err(GitHubError::Other(anyhow::anyhow!( + "9% uptime is one nine" + ))), + // Shouldn't be called in this test + github_user_by_id_response: Ok(github_user(GITHUB_ID, "wrong-user-info")), + expected_username: EXISTING_LOGIN, + expected_last_sync_updated: false, + } + .run() + .await; +} + +#[tokio::test(flavor = "multi_thread")] +async fn undeleted() { + UpdateTest { + dry_run: false, + existing_github_id: GITHUB_ID, + existing_username: "ghost_1", + // Not sure how often this happens, but if we marked an account as ghost but we get a + // valid user again, we should update their username + github_current_user_response: Ok(github_user(GITHUB_ID, "my-new-username")), + // Shouldn't be called in this test + github_user_by_id_response: Ok(github_user(GITHUB_ID, "wrong-user-info")), + expected_username: "my-new-username", + expected_last_sync_updated: true, + } + .run() + .await; +} + +#[tokio::test(flavor = "multi_thread")] +async fn dry_run_mode_doesnt_update() { + UpdateTest { + dry_run: true, + existing_github_id: GITHUB_ID, + existing_username: EXISTING_LOGIN, + github_current_user_response: Ok(github_user(GITHUB_ID, "my-new-username")), + // Shouldn't be called in this test + github_user_by_id_response: Ok(github_user(GITHUB_ID, "wrong-user-info")), + expected_username: EXISTING_LOGIN, + expected_last_sync_updated: false, + } + .run() + .await; +} diff --git a/src/worker/jobs/mod.rs b/src/worker/jobs/mod.rs index b3cb1e32915..c098e5fd068 100644 --- a/src/worker/jobs/mod.rs +++ b/src/worker/jobs/mod.rs @@ -18,6 +18,7 @@ mod sync_admins; pub mod trustpub; mod typosquat; mod update_default_version; +mod update_user_from_github; pub use self::analyze_crate_file::AnalyzeCrateFile; pub use self::archive_version_downloads::ArchiveVersionDownloads; @@ -42,3 +43,4 @@ pub use self::send_publish_notifications::SendPublishNotificationsJob; pub use self::sync_admins::SyncAdmins; pub use self::typosquat::CheckTyposquat; pub use self::update_default_version::UpdateDefaultVersion; +pub use self::update_user_from_github::UpdateUserFromGithub; diff --git a/src/worker/jobs/update_user_from_github.rs b/src/worker/jobs/update_user_from_github.rs new file mode 100644 index 00000000000..908cf6bb41e --- /dev/null +++ b/src/worker/jobs/update_user_from_github.rs @@ -0,0 +1,186 @@ +use crate::{ + models::OauthGithub, + schema::{oauth_github, users}, + worker::Environment, +}; +use chrono::Utc; +use crates_io_github::{GitHubError, GitHubUser}; +use crates_io_worker::BackgroundJob; +use diesel::prelude::*; +use diesel_async::{AsyncConnection, AsyncPgConnection, RunQueryDsl}; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; +use tracing::{error, info}; + +#[derive(Serialize, Deserialize)] +pub struct UpdateUserFromGithub { + /// Dry run will fetch updates from GitHub and log what it would change, but does not actually + /// update the database. + pub dry_run: bool, + /// GitHub ID + pub account_id: i64, +} + +impl BackgroundJob for UpdateUserFromGithub { + const JOB_NAME: &'static str = "update_user_from_github"; + const DEDUPLICATED: bool = true; + // These jobs aren't urgent and shouldn't page anyone if they take a long time. + const PRIORITY: i16 = -15; + + type Context = Arc; + + /// For the specified user, query the GitHub API for the user's current information to see if + /// their account has been deleted or renamed. Update the `users` and `oauth_github` tables, + /// saving the current time in `last_sync` even if the user information hasn't changed. + async fn run(&self, ctx: Self::Context) -> anyhow::Result<()> { + let mut conn = ctx.deadpool.get().await?; + + // If no oauth_github info with this account id is found, then the record has been deleted + // since this job was enqueued. Stop and exit with success so we don't retry the job. + let oauth_github = oauth_github::table + .filter(oauth_github::account_id.eq(self.account_id)) + .first::(&mut conn) + .await?; + + info!( + "Starting UpdateUserFromGithub ({}): user_id {}, github_id {}, old username {}", + if self.dry_run { "DRY RUN" } else { "FOR REAL" }, + oauth_github.user_id, + self.account_id, + oauth_github.login, + ); + + let github_user = self.refresh_user(&ctx, &oauth_github).await?; + + if self.dry_run { + info!( + "Dry run UpdateUserFromGithub proposed update for crates.io user {} \ + from username `{}` to username `{}`", + oauth_github.user_id, oauth_github.login, github_user.login, + ); + } else { + self.apply_update(&oauth_github, &github_user, &mut conn) + .await; + } + + Ok(()) + } +} + +impl UpdateUserFromGithub { + /// Given the current environment's context, request information from GitHub using the user's + /// API token. + async fn refresh_user( + &self, + ctx: &Arc, + oauth_github: &OauthGithub, + ) -> anyhow::Result { + let github = ctx.github.as_ref(); + let token = ctx + .config + .gh_token_encryption + .decrypt(&oauth_github.encrypted_token)?; + + match github.current_user(&token).await { + Ok(github_user) => Ok(github_user), + // If the user is not found, the account has been deleted. Update to the ghost + // username. + Err(GitHubError::NotFound(_)) => Ok(self.ghost_user(oauth_github.user_id)), + // Unauthorized/forbidden could mean: + // + // - the token we have for this user is out-of-date + // - the user has revoked crates.io's oauth access + // + // In those cases, try to request the user's info via an unauthenticated GitHub + // API request, unless they are a GitHub Enterprise Managed User as indicated by an + // underscore in their username because we have to be authorized by the managing + // enterprise to see any information on enterprise managed users. + Err(GitHubError::Unauthorized(_)) | Err(GitHubError::Forbidden(_)) => { + // Enterprise managed users are the only ones that should contain underscores. + if oauth_github.login.contains('_') { + // We can't get updated info, so keep what we have. + Ok(GitHubUser { + login: oauth_github.login.clone(), + id: self.account_id as i32, + // The other fields are not used in `apply_update`. + avatar_url: Default::default(), + email: Default::default(), + name: Default::default(), + }) + } else { + match github.get_user_by_id(self.account_id).await { + Ok(github_user) => Ok(github_user), + Err(GitHubError::NotFound(_)) => Ok(self.ghost_user(oauth_github.user_id)), + // Not sure how we could get Unauthorized/Forbidden for an anonymous + // API request. We could get rate limited though; if that's the case, + // stop and try this user again later. + Err(e) => Err(e.into()), + } + } + } + // If we get another sort of error, it may be transient; stop and try this user + // again later. + Err(e @ GitHubError::Other(_)) => Err(e.into()), + } + } + + /// Given the information from GitHub about the current user, make the appropriate changes to + /// the `users` and `oauth_github` tables. + async fn apply_update( + &self, + oauth_github: &OauthGithub, + github_user: &GitHubUser, + conn: &mut AsyncPgConnection, + ) { + // Use a transaction so that we either update both or neither the `users` record and the + // corresponding `oauth_github` record. If neither are updated, log and continue to the + // next user rather than stopping-- hopefully we'll get that user updated next time. + if let Err(e) = conn + .transaction(async |conn| { + // This will be removed when we no longer sync crates.io usernames with GitHub. + // (The transaction can be removed when this is removed as well) + // It's only needed if there's a change in username. + if oauth_github.login != github_user.login { + diesel::update(users::table) + .filter(users::id.eq(oauth_github.user_id)) + .set(users::gh_login.eq(&github_user.login)) + .execute(conn) + .await?; + } + + // This update is needed even if there's no change in username to set the + // `last_sync` time to `now`. + diesel::update(oauth_github::table) + .filter(oauth_github::account_id.eq(self.account_id)) + .set(( + oauth_github::login.eq(&github_user.login), + oauth_github::last_sync.eq(Utc::now()), + )) + .execute(conn) + .await?; + + Ok::<(), diesel::result::Error>(()) + }) + .await + { + // Database update failed; it's ok to not update this user this round. + // Better luck next time. + error!( + "Could not update user ID {} from username {} to username {}: {e}", + oauth_github.user_id, oauth_github.login, github_user.login, + ); + } + } + + /// If this user has been deleted, ensure their username has been changed to + /// `ghost_{crates.io id}` to ensure uniqueness by creating a `GitHubUser` by hand. + fn ghost_user(&self, user_id: i32) -> GitHubUser { + GitHubUser { + avatar_url: None, + email: None, + id: self.account_id as i32, + login: format!("ghost_{}", user_id), + name: None, + } + } +} diff --git a/src/worker/mod.rs b/src/worker/mod.rs index 02caf2703ac..9f863afa8f0 100644 --- a/src/worker/mod.rs +++ b/src/worker/mod.rs @@ -43,6 +43,7 @@ impl RunnerExt for Runner> { .register_job_type::() .register_job_type::() .register_job_type::() + .register_job_type::() .register_job_type::() .register_job_type::() .register_job_type::()