Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions openedx/core/djangoapps/course_groups/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,3 +311,25 @@ class Meta:
course_user_group = models.ForeignKey(CourseUserGroup, on_delete=models.CASCADE) # noqa: DJ012
email = models.CharField(blank=True, max_length=255, db_index=True)
course_id = CourseKeyField(max_length=255)

@classmethod
def delete_by_user_value(cls, value, field):
"""
Redacts the email field then deletes records where
``field`` equals ``value``.

Returns True if deleted, False if no matching records found.
"""
filter_kwargs = {field: value}
records_matching_user_value = cls.objects.filter(**filter_kwargs)

if not records_matching_user_value.exists():
return False

# Extract IDs to prevent over-affecting unrelated records
ids = list(records_matching_user_value.values_list('id', flat=True))

# Redact email before deletion
cls.objects.filter(id__in=ids).update(email='redacted@retired.invalid')
cls.objects.filter(id__in=ids).delete()
return True
51 changes: 50 additions & 1 deletion openedx/core/djangoapps/course_groups/tests/test_cohorts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
import ddt
import pytest
from django.contrib.auth.models import AnonymousUser, User # pylint: disable=imported-auth-user
from django.db import IntegrityError
from django.db import IntegrityError, connection
from django.http import Http404
from django.test import TestCase
from django.test.utils import CaptureQueriesContext
from opaque_keys.edx.keys import CourseKey
from opaque_keys.edx.locator import CourseLocator
from openedx_events.testing import OpenEdxEventsTestMixin
Expand Down Expand Up @@ -807,3 +808,51 @@ def test_retired_user_with_no_cohort_returns_false(self):

assert not was_retired
assert self.cohort_assignment.email == known_learner_email

def test_email_redacted_before_delete(self):
"""
Verify email is redacted (UPDATE) before the record is deleted, using
ID-based filtering to prevent over-affecting unrelated rows.
"""
# Create an unrelated assignment with the same email in a different course
other_course_key = CourseKey.from_string('course-v1:edX+OtherX+Other_Course')
other_cohort = CourseUserGroup.objects.create(
name='OtherCohort',
course_id=other_course_key,
group_type=CourseUserGroup.COHORT,
)
other_assignment = UnregisteredLearnerCohortAssignments.objects.create(
course_user_group=other_cohort,
course_id=other_course_key,
email='learner@example.com',
)

with CaptureQueriesContext(connection) as context:
was_retired = UnregisteredLearnerCohortAssignments.delete_by_user_value(
value='learner@example.com',
field='email'
)

assert was_retired

updates = [q for q in context.captured_queries if 'UPDATE' in q['sql']]
deletes = [q for q in context.captured_queries if 'DELETE' in q['sql']]

# Exactly one bulk UPDATE and one bulk DELETE
assert len(updates) == 1
assert len(deletes) == 1

# Both must use ID-based filtering
assert '"id" IN' in updates[0]['sql']
assert '"id" IN' in deletes[0]['sql']

# UPDATE must precede DELETE
assert context.captured_queries.index(updates[0]) < context.captured_queries.index(deletes[0])

# UPDATE must set email to the redacted sentinel value
assert 'redacted@retired.invalid' in updates[0]['sql']

# Both records must be deleted
assert not UnregisteredLearnerCohortAssignments.objects.filter(
id__in=[self.cohort_assignment.id, other_assignment.id]
).exists()
Original file line number Diff line number Diff line change
Expand Up @@ -1504,6 +1504,71 @@ def test_retire_user(self, mock_remove_profile_images, mock_get_profile_image_na
assert not CourseEnrollmentAllowed.objects.filter(email=self.original_email).exists()
assert not UnregisteredLearnerCohortAssignments.objects.filter(email=self.original_email).exists()

@mock.patch('openedx.core.djangoapps.user_api.accounts.views.get_profile_image_names')
@mock.patch('openedx.core.djangoapps.user_api.accounts.views.remove_profile_images')
def test_cohort_assignment_email_redacted_before_delete(
self, mock_remove_profile_images, mock_get_profile_image_names
):
"""
Verify that UnregisteredLearnerCohortAssignments email is redacted (UPDATE) before the
record is deleted, so downstream systems (e.g. Snowflake) never see the raw email in
CDC/replication logs.

Uses CaptureQueriesContext to assert the UPDATE precedes the DELETE at the SQL level,
and that both operations use ID-based filtering to prevent over-affecting unrelated rows.
"""
other_cohort = CourseUserGroup.objects.create(
name="OtherCohort",
course_id=self.course_key,
group_type=CourseUserGroup.COHORT,
)
other_assignment = UnregisteredLearnerCohortAssignments.objects.create(
course_user_group=other_cohort,
course_id=CourseKey.from_string('course-v1:edX+OtherX+Other_Course'),
email=self.original_email,
)

data = {'username': self.original_username}
with CaptureQueriesContext(connection) as context:
self.post_and_assert_status(data)

cohort_updates = [
q for q in context.captured_queries
if 'UPDATE' in q['sql'] and 'course_groups_unregisteredlearnercohortassignments' in q['sql']
]
cohort_deletes = [
q for q in context.captured_queries
if 'DELETE' in q['sql'] and 'course_groups_unregisteredlearnercohortassignments' in q['sql']
]

# Exactly one bulk UPDATE and one bulk DELETE — not per-record queries
assert len(cohort_updates) == 1, f"Expected 1 UPDATE, got {len(cohort_updates)}"
assert len(cohort_deletes) == 1, f"Expected 1 DELETE, got {len(cohort_deletes)}"

# UPDATE must use ID-based filtering to avoid over-redacting
assert '"id" IN' in cohort_updates[0]['sql'], (
f"UPDATE should use ID-based filtering: {cohort_updates[0]['sql']}"
)
# DELETE must use ID-based filtering to avoid over-deletion
assert '"id" IN' in cohort_deletes[0]['sql'], (
f"DELETE should use ID-based filtering: {cohort_deletes[0]['sql']}"
)

# UPDATE must appear before DELETE in the query stream
update_pos = context.captured_queries.index(cohort_updates[0])
delete_pos = context.captured_queries.index(cohort_deletes[0])
assert update_pos < delete_pos, "Email redaction (UPDATE) must precede record deletion (DELETE)"

# UPDATE must set email to the redacted sentinel value
assert 'redacted@retired.invalid' in cohort_updates[0]['sql'], (
f"UPDATE should set email to 'redacted@retired.invalid': {cohort_updates[0]['sql']}"
)

# Both cohort assignments for original_email must be gone
assert not UnregisteredLearnerCohortAssignments.objects.filter(
id__in=[self.cohort_assignment.id, other_assignment.id]
).exists()

def test_retire_user_twice_idempotent(self):
data = {'username': self.original_username}
self.post_and_assert_status(data)
Expand Down
Loading