Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pycaching.errors import Error
from pycaching.geocaching import Geocaching

from .helpers import sanitize_cookies
from .cassette_sanitizer import sanitize_betamax_interaction

username = os.environ.get("PYCACHING_TEST_USERNAME") or "USERNAMEPLACEHOLDER"
password = os.environ.get("PYCACHING_TEST_PASSWORD") or "PASSWORDPLACEHOLDER"
Expand All @@ -25,7 +25,7 @@
config.cassette_library_dir = str(cassette_dir)
config.define_cassette_placeholder("<USERNAME>", quote_plus(username))
config.define_cassette_placeholder("<PASSWORD>", quote_plus(password))
config.before_record(callback=sanitize_cookies)
config.before_record(callback=sanitize_betamax_interaction)
Betamax.register_serializer(PrettyJSONSerializer)


Expand Down
133 changes: 133 additions & 0 deletions test/cassette_sanitizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""Shared Betamax cassette sanitization helpers for recorded fixtures.

The suite records authenticated traffic, so cassettes may capture user-specific
values that are irrelevant to assertions: auth tokens, page bootstrap user
metadata, ASP.NET hidden fields, and home-location coordinates in URLs.
This module keeps the scrubbing rules in one place for both unittest and
pytest-based Betamax setups.
"""

import re

from betamax.cassette.cassette import Placeholder

CLASSIFIED_COOKIES = (
"gspkauth",
"__RequestVerificationToken",
"jwt",
)

# Each rule must contain exactly one capture group with the sensitive value.
# Variables are exported for use in tests that assert on placeholder values.
PLACEHOLDER_RULES = {
# This bootstrap object is not parsed by library code, so replacing its
# contents wholesale keeps fixtures smaller and hides a lot of account data.
"<CHROME SETTINGS>": (re.compile(r"window\['chromeSettings'\]\s*=\s*\{([\s\S]*?)\};"),),
"<AUTH COOKIE>": (
re.compile(r"__RequestVerificationToken=([^&\"\s]+)"),
re.compile(r'"__RequestVerificationToken"\s*:\s*"([^"]+)"'),
re.compile(r'name="__RequestVerificationToken"[^>]*value="([^"]+)"'),
),
"<VIEWSTATE>": (
re.compile(r'name="__VIEWSTATE"[^>]*value="([^"]+)"'),
re.compile(r"__VIEWSTATE=([^&\"\s]+)"),
),
"<VIEWSTATE1>": (
re.compile(r'name="__VIEWSTATE1"[^>]*value="([^"]+)"'),
re.compile(r"__VIEWSTATE1=([^&\"\s]+)"),
),
"<VIEWSTATEGENERATOR>": (
re.compile(r'name="__VIEWSTATEGENERATOR"[^>]*value="([^"]+)"'),
re.compile(r"__VIEWSTATEGENERATOR=([^&\"\s]+)"),
),
"<USERNAME>": (
re.compile(r'"username"\s*:\s*"([^"]+)"'),
re.compile(r'"Username"\s*:\s*"([^"]+)"'),
),
"<USER GUID>": (re.compile(r'"(?:publicGuid|PublicGuid|userPublicGuid)"\s*:\s*"([^"]+)"'),),
"<USER ID>": (
re.compile(r'"accountId"\s*:\s*(\d+)'),
re.compile(r'"gcUser"\s*:\s*\{[\s\S]*?"id"\s*:\s*(\d+)'),
),
"<USER CODE>": (
re.compile(r'"referenceCode"\s*:\s*"(P[A-Z0-9]+)"'),
re.compile(r"window\['userRef'\]\s*=\s*'([^']+)'"),
re.compile(r'"userRef"\s*:\s*"([^"]+)"'),
),
"<USER TOKEN>": (
re.compile(r"\buserToken\s*=\s*'([^']+)'"),
re.compile(r'"userToken"\s*:\s*"([^"]+)"'),
re.compile(r"([?&]tkn=)([^&\"\s]+)"),
),
"<HOME LOCATION>": (re.compile(r'"(?:homeLocation|HomeLocation)"\s*:\s*"([^"]+)"'),),
"<HOME COORDS>": (
re.compile(r'"(?:homeCoords|HomeCoords)"\s*:\s*"([^"]+)"'),
re.compile(r"(?:[?&;]saddr=)(-?\d+(?:\.\d+)?(?:%2C|,)-?\d+(?:\.\d+)?)"),
),
"<USER CREATED DATE>": (re.compile(r'"dateCreated"\s*:\s*"([^"]+)"'),),
"<CLIENT IP COORDINATE>": (re.compile(r'"clientIpCoordinate"\s*:\s*(\{[^}]+\})'),),
}


def sanitize_betamax_interaction(interaction, cassette):
"""Register placeholders for sensitive values found in one Betamax interaction."""
_collect_cookie_placeholders(interaction, cassette)

for text in _iter_interaction_texts(interaction):
for placeholder, patterns in PLACEHOLDER_RULES.items():
for pattern in patterns:
for value in pattern.findall(text):
if isinstance(value, tuple):
value = value[-1]
_add_placeholder(cassette, placeholder, value)


def _collect_cookie_placeholders(interaction, cassette):
response = interaction.as_response()
response_cookies = response.cookies
request_cookies = {}
response_headers = interaction.data.get("response", {}).get("headers", {})
response_set_cookies = response_headers.get("Set-Cookie", [])

for cookie in (response.request.headers.get("Cookie") or "").split("; "):
name, sep, value = cookie.partition("=")
if sep:
request_cookies[name] = value

for name in CLASSIFIED_COOKIES:
_add_placeholder(cassette, "<AUTH COOKIE>", response_cookies.get(name))
_add_placeholder(cassette, "<AUTH COOKIE>", request_cookies.get(name))
for header in response_set_cookies:
match = re.search(rf"(?:^|[;,]\s*){re.escape(name)}=([^;,\s]+)", header)
if match:
_add_placeholder(cassette, "<AUTH COOKIE>", match.group(1))


def _iter_interaction_texts(interaction):
for obj, key in (("request", "uri"), ("response", "url")):
value = interaction.data.get(obj, {}).get(key)
if value:
yield value

for obj in ("request", "response"):
headers = interaction.data.get(obj, {}).get("headers", {})
for value in headers.values():
if isinstance(value, list):
yield "\n".join(value)
elif value:
yield value

body = interaction.data.get(obj, {}).get("body", "")
value = body.get("string") if isinstance(body, dict) else body
if value:
yield value


def _add_placeholder(cassette, placeholder, value):
if not value or value.startswith("<"):
return

if any(item.placeholder == placeholder and item.replace == value for item in cassette.placeholders):
return

cassette.placeholders.append(Placeholder(placeholder=placeholder, replace=value))
30 changes: 0 additions & 30 deletions test/helpers.py

This file was deleted.

33 changes: 2 additions & 31 deletions tests_new/conftest.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
import os
from pathlib import Path
from test.cassette_sanitizer import sanitize_betamax_interaction
from urllib.parse import quote_plus

import pytest
import requests
from betamax import Betamax
from betamax.cassette.cassette import Placeholder
from betamax_serializers.pretty_json import PrettyJSONSerializer

from pycaching.geocaching import Geocaching

USERNAME = os.environ.get("PYCACHING_TEST_USERNAME") or "USERNAMEPLACEHOLDER"
PASSWORD = os.environ.get("PYCACHING_TEST_PASSWORD") or "PASSWORDPLACEHOLDER"
COOKIE = os.environ.get("PYCACHING_TEST_COOKIE")
CLASSIFIED_COOKIES = (
"gspkauth",
"__RequestVerificationToken",
"jwt", # NOTE: JWT token, contains user related informations: username, ids, oauth token
)
CASSETTE_DIR = Path(__file__).parent / "cassettes"


Expand All @@ -29,7 +24,7 @@ def betamax_config():
_betamax_config.cassette_library_dir = str(CASSETTE_DIR)
_betamax_config.define_cassette_placeholder("<USERNAME>", quote_plus(USERNAME))
_betamax_config.define_cassette_placeholder("<PASSWORD>", quote_plus(PASSWORD))
_betamax_config.before_record(callback=_sanitize_betamax_cookies)
_betamax_config.before_record(callback=sanitize_betamax_interaction)
_betamax_config.default_cassette_options["serialize_with"] = "prettyjson"


Expand Down Expand Up @@ -58,27 +53,3 @@ def geocaching_logged_in(betamax_session: requests.Session):
else:
gc.login(USERNAME, PASSWORD)
return gc


def _sanitize_betamax_cookies(interaction, cassette):
# TODO handle also request body occurence of __RequestVerificationToken
response = interaction.as_response()
response_cookies = response.cookies
request_cookies = dict()
for cookie in (interaction.as_response().request.headers.get("Cookie") or "").split("; "):
name, sep, val = cookie.partition("=")
if sep:
request_cookies[name] = val

secret_values = set()
for name in CLASSIFIED_COOKIES:
potential_val = response_cookies.get(name)
if potential_val:
secret_values.add(potential_val)

potential_val = request_cookies.get(name)
if potential_val:
secret_values.add(potential_val)

for val in secret_values:
cassette.placeholders.append(Placeholder(placeholder="<AUTH COOKIE>", replace=val))
137 changes: 137 additions & 0 deletions tests_new/test_cassette_sanitizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import json
import secrets
from test.cassette_sanitizer import sanitize_betamax_interaction
from types import SimpleNamespace
from uuid import uuid4

import pytest
import requests


@pytest.fixture(autouse=True)
def betamax_forgotten_recording_env_vars_fuse():
"""Override the autouse fuse; this unit test does not record network traffic."""


def test_sanitize_betamax_interaction_collects_requested_placeholders():
auth_cookie = secrets.token_urlsafe(16)
rotated_auth_cookie = secrets.token_urlsafe(16)
request_token = secrets.token_urlsafe(18)
response_token = secrets.token_urlsafe(20)
viewstate = secrets.token_urlsafe(24)
viewstate_generator = secrets.token_hex(4).upper()
username = "user_" + secrets.token_hex(4)
user_guid = str(uuid4())
user_id = str(secrets.randbelow(90_000_000) + 10_000_000)
user_code = "PR" + secrets.token_hex(3).upper()
user_token = secrets.token_urlsafe(40)
user_created_date = "2011-11-11T11:11:11"
membership_level = "1"
locale = "en-US"
date_format = "dd.MM.yyyy"
home_coords = "12.123456,12.123456"
client_ip_coordinate = json.dumps({"latitude": 12.123456, "longitude": 12.123456})
next_data = json.dumps(
{
"props": {
"pageProps": {
"gcUser": {
"id": int(user_id),
"username": username,
"publicGuid": user_guid,
"referenceCode": user_code,
"dateCreated": user_created_date,
"locale": locale,
"membershipLevel": int(membership_level),
"dateFormat": date_format,
"clientIpCoordinate": json.loads(client_ip_coordinate),
}
}
}
}
)

interaction = SimpleNamespace(
data={
"request": {
"uri": "https://www.geocaching.com/seek/geocache.logbook?tkn=TOKEN",
"headers": {
"Cookie": ["gspkauth={}; __RequestVerificationToken={}".format(auth_cookie, request_token)],
},
"body": {
"string": "__RequestVerificationToken={}&__VIEWSTATE={}".format(request_token, viewstate),
},
},
"response": {
"url": "https://www.geocaching.com/cache?saddr={}".format(home_coords),
"headers": {
"Set-Cookie": [
"gspkauth={}; path=/; secure; HttpOnly".format(rotated_auth_cookie),
],
},
"body": {
"string": """
<input type="hidden" name="__VIEWSTATEGENERATOR" value="{viewstate_generator}" />
<input type="hidden" name="__RequestVerificationToken" value="{response_token}" />
<script>
window['chromeSettings'] = {{"accountId": {user_id}, "username": "{username}",
"userPublicGuid": "{user_guid}",
"referenceCode": "{user_code}", "homeCoords": "{home_coords}"}};
window['userRef'] = '{user_code}';
userToken = '{user_token}';
</script>
<script id="__NEXT_DATA__" type="application/json">
{next_data}
</script>
""".format(
response_token=response_token,
viewstate_generator=viewstate_generator,
user_id=user_id,
username=username,
user_guid=user_guid,
user_code=user_code,
user_token=user_token,
home_coords=home_coords,
next_data=next_data,
),
},
},
}
)

response = SimpleNamespace(
cookies=requests.cookies.cookiejar_from_dict(
{"gspkauth": auth_cookie, "__RequestVerificationToken": response_token}
),
request=SimpleNamespace(
headers={"Cookie": "gspkauth={}; __RequestVerificationToken={}".format(auth_cookie, request_token)}
),
)
interaction.as_response = lambda: response

cassette = SimpleNamespace(placeholders=[])

sanitize_betamax_interaction(interaction, cassette)

placeholders = {(item.placeholder, item.replace) for item in cassette.placeholders}
chrome_settings_values = [value for placeholder, value in placeholders if placeholder == "<CHROME SETTINGS>"]
assert len(chrome_settings_values) == 1
assert user_id in chrome_settings_values[0]
assert username in chrome_settings_values[0]
assert user_guid in chrome_settings_values[0]
assert user_code in chrome_settings_values[0]
assert home_coords in chrome_settings_values[0]
assert ("<AUTH COOKIE>", auth_cookie) in placeholders
assert ("<AUTH COOKIE>", rotated_auth_cookie) in placeholders
assert ("<AUTH COOKIE>", request_token) in placeholders
assert ("<AUTH COOKIE>", response_token) in placeholders
assert ("<VIEWSTATE>", viewstate) in placeholders
assert ("<VIEWSTATEGENERATOR>", viewstate_generator) in placeholders
assert ("<USERNAME>", username) in placeholders
assert ("<USER GUID>", user_guid) in placeholders
assert ("<USER ID>", user_id) in placeholders
assert ("<USER CODE>", user_code) in placeholders
assert ("<USER TOKEN>", user_token) in placeholders
assert ("<USER CREATED DATE>", user_created_date) in placeholders
assert ("<CLIENT IP COORDINATE>", client_ip_coordinate) in placeholders
assert ("<HOME COORDS>", home_coords) in placeholders
Loading