From 7990cf07d459d6a39e97d631fde7c2de63e2c226 Mon Sep 17 00:00:00 2001 From: matthewholman Date: Sun, 17 May 2026 15:02:33 -0400 Subject: [PATCH] Fix two bugs that broke orbitfit(weight_data=True). The weight_data=True code path has been broken since it was added. Two distinct bugs, hit in sequence: 1. NameError on `astcat_column_present`. `_orbitfit` defined the flag as `g_column_present = "astCat" in column_names` but then referenced `astcat_column_present` two lines later inside the debias and weighting branches. The path was reached the moment `weight_data=True` was passed, so the feature crashed before doing any fitting. Fix: rename the definition to match the usage sites (`astcat_column_present`). 2. Arcsecond vs radian unit mismatch. `data_weight_Veres2017` returns the astrometric uncertainty in ARCSECONDS (per its docstring). The call site assigned the return value directly to `Observation.ra_unc` and `dec_unc`, which are stored in RADIANS. That made each observation's weight ~200000^2 = 4e10x too loose, inflating the fit covariance to absurd values (sigma_pos ~ 25 million km on a 3 AU object) and driving chi-square to zero. Fix: convert arcsec -> radian at the assignment. Effect on layup's reported position uncertainty for the three real MPC objects in the test fixture (mainbelt asteroids, ~2-3 AU): Object weight_data=False weight_data=True (after fix) ------ ----------------- ---------------------------- 119839 269 km 123 km 742428 340 km 160 km 609631 306 km 80 km With the fix, weight_data=True roughly halves the reported sigma_pos (closer to JPL Horizons' published per-element sigmas for these objects), and chi-square/dof moves from artificially low (0.05-0.23, suggesting the default 1-arcsec uncertainty overweights the data) to a more honest 0.27-0.53. tests/layup/test_weight_data.py adds two regression tests: - test_orbitfit_weight_data_true_runs_without_crashing: catches the NameError-type regression on the wd=True path. - test_orbitfit_weight_data_true_gives_sensible_uncertainty: asserts sigma_pos < 10000 km for the 27-year mainbelt arc -- well above the actual ~120 km but trivially below the 25-million-km failure mode of the units bug. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/layup/orbitfit.py | 12 +++-- tests/layup/test_weight_data.py | 81 +++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 4 deletions(-) create mode 100644 tests/layup/test_weight_data.py diff --git a/src/layup/orbitfit.py b/src/layup/orbitfit.py index afab1db..c09b454 100644 --- a/src/layup/orbitfit.py +++ b/src/layup/orbitfit.py @@ -516,7 +516,7 @@ def _orbitfit( # Check if certain columns are present in the data column_names = data.dtype.names - g_column_present = "astCat" in column_names + astcat_column_present = "astCat" in column_names program_column_present = "program" in column_names position_rates_columns_present = all(col in column_names for col in ["raRate", "decRate"]) @@ -566,15 +566,19 @@ def _orbitfit( ) if weight_data: - data_weight = data_weight_Veres2017( + # data_weight_Veres2017 returns the astrometric uncertainty in + # ARCSECONDS (per its docstring), but Observation.ra_unc / + # dec_unc are stored in RADIANS. Convert at the assignment. + data_weight_arcsec = data_weight_Veres2017( obsCode=d["stn"], jd_tdb=convert_tdb_date_to_julian_date(d["obsTime"], cache_dir), catalog=d["astCat"] if astcat_column_present else None, program=d["program"] if program_column_present else None, ) + data_weight_rad = data_weight_arcsec * np.pi / (180.0 * 3600.0) - o.ra_unc = data_weight - o.dec_unc = data_weight + o.ra_unc = data_weight_rad + o.dec_unc = data_weight_rad observations.append(o) diff --git a/tests/layup/test_weight_data.py b/tests/layup/test_weight_data.py new file mode 100644 index 0000000..6c892c7 --- /dev/null +++ b/tests/layup/test_weight_data.py @@ -0,0 +1,81 @@ +"""Regression tests for `orbitfit(weight_data=True)`. + +Guards two bugs that previously broke the `weight_data=True` code path: + + 1. `g_column_present` was defined but `astcat_column_present` was + referenced inside _orbitfit, raising NameError at the first + data_weight_Veres2017 call -- failure mode: hard crash before + any fitting happened. + + 2. `data_weight_Veres2017` returns astrometric uncertainty in + ARCSECONDS (per its docstring), but the call site assigned the + return value directly to Observation.ra_unc / dec_unc which are + stored in RADIANS -- failure mode: weights were ~206000x too + loose, fit covariance inflated to absurd values (sigma_pos + ~25 million km on a 3 AU object), chi-square -> 0. + +The fix is a one-line rename plus an explicit arcsec->radian +conversion at the call site; this file guards against either kind +of regression. +""" + +from __future__ import annotations + +import os + +import numpy as np +import pytest + +from layup.orbitfit import orbitfit +from layup.utilities.data_processing_utilities import parse_cov +from layup.utilities.data_utilities_for_tests import get_test_filepath +from layup.utilities.file_io.CSVReader import CSVDataReader + +CACHE = os.path.expanduser("~/Library/Caches/layup") +EPHEM_PLANETS = os.path.join(CACHE, "linux_p1550p2650.440") +EPHEM_SMALLBODIES = os.path.join(CACHE, "sb441-n16.bsp") +EPHEM_AVAILABLE = os.path.exists(EPHEM_PLANETS) and os.path.exists(EPHEM_SMALLBODIES) +AU_KM = 149597870.7 + +pytestmark = pytest.mark.skipif( + not EPHEM_AVAILABLE, + reason=f"ASSIST ephemeris missing at {CACHE}; skipping weight_data test.", +) + + +def _fit_one(prov_id: str, weight_data: bool): + reader = CSVDataReader( + get_test_filepath("4_random_mpc_ADES_provIDs_no_sats.csv"), + "csv", + primary_id_column_name="provID", + ) + data = reader.read_rows() + data = data[data["provID"] == prov_id] + assert len(data) > 0, f"test fixture missing provID {prov_id}" + return orbitfit(data, cache_dir=CACHE, weight_data=weight_data)[0] + + +def test_orbitfit_weight_data_true_runs_without_crashing(): + """orbitfit(weight_data=True) on real MPC data without an astCat column + must run to completion -- previously raised NameError on the missing + astcat_column_present variable.""" + row = _fit_one("119839", weight_data=True) + assert "flag" in row.dtype.names + + +def test_orbitfit_weight_data_true_gives_sensible_uncertainty(): + """orbitfit(weight_data=True) must apply weights in the right units. + Previously the returned arcsecond value was assigned directly to + Observation.ra_unc (which is in radians), inflating sigma_pos by + ~5 orders of magnitude. An explicit cap of 10000 km on the position + uncertainty for our well-constrained 27-year mainbelt arc catches + that regression: actual sigma_pos with the fix is O(100 km).""" + row = _fit_one("119839", weight_data=True) + assert row["flag"] == 0, f"weight_data=True fit did not converge (flag={row['flag']})" + cov = parse_cov(row) + sigma_pos_km = float(np.sqrt(cov[0, 0] + cov[1, 1] + cov[2, 2])) * AU_KM + assert sigma_pos_km < 10000.0, ( + f"sigma_pos={sigma_pos_km:.1f} km is unreasonably large for a " + f"587-obs 27-year arc; likely a units regression in the " + f"weight assignment (arcsec vs radian)." + )