From b0a8b5c7e90b266cb33a466a2992798abd1a700d Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Sun, 3 May 2026 14:04:54 -0500
Subject: [PATCH 1/3] Fix get_nearest_continuous: accept scalar targets and
 missing time column
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The docstring says ``targets`` accepts "anything ``pandas.to_datetime``
consumes", which includes a bare string or ``pd.Timestamp``. But
``pd.to_datetime("2024-01-01T00:00:00Z", utc=True)`` returns a scalar
``Timestamp``, and ``pd.DatetimeIndex(scalar)`` raises ``TypeError`` —
so single-value cases crashed despite the documented contract.

Wrap a scalar result in a one-element ``DatetimeIndex`` so any
``pandas.to_datetime``-consumable input works.

Also: when the user passes ``properties`` that excludes ``time``, the
helper used to crash with ``KeyError`` deep inside ``df.assign``. Detect
the missing column up front and raise a ``ValueError`` pointing at the
likely cause.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dataretrieval/waterdata/nearest.py | 20 +++++++++++-
 tests/waterdata_nearest_test.py    | 51 ++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/dataretrieval/waterdata/nearest.py b/dataretrieval/waterdata/nearest.py
index 68f123ef..e2edea7a 100644
--- a/dataretrieval/waterdata/nearest.py
+++ b/dataretrieval/waterdata/nearest.py
@@ -139,7 +139,7 @@ def get_nearest_continuous(
         ... )
     """
     _check_nearest_kwargs(kwargs, on_tie)
-    target_index = pd.DatetimeIndex(pd.to_datetime(targets, utc=True))
+    target_index = _coerce_targets(targets)
     window_td = pd.Timedelta(window)
 
     if len(target_index) == 0:
@@ -153,6 +153,11 @@ def get_nearest_continuous(
         filter_lang="cql-text",
         **kwargs,
     )
+    if "time" not in df.columns:
+        raise ValueError(
+            "get_nearest_continuous requires a 'time' column in the response; "
+            "if a `properties` kwarg was passed, include 'time' in it"
+        )
     if df.empty:
         return _empty_nearest_result(df), md
 
@@ -174,6 +179,19 @@ def get_nearest_continuous(
     return pd.DataFrame(selected).reset_index(drop=True), md
 
 
+def _coerce_targets(targets: Any) -> pd.DatetimeIndex:
+    """Accept anything ``pandas.to_datetime`` consumes, including a single value.
+
+    A bare scalar (string, ``Timestamp``, ``datetime``, …) becomes a
+    one-element ``DatetimeIndex``; an iterable round-trips through
+    ``pd.to_datetime`` directly.
+    """
+    parsed = pd.to_datetime(targets, utc=True)
+    if isinstance(parsed, pd.DatetimeIndex):
+        return parsed
+    return pd.DatetimeIndex([parsed])
+
+
 def _check_nearest_kwargs(kwargs: dict[str, Any], on_tie: OnTie) -> None:
     """Reject kwargs the helper owns; validate ``on_tie``."""
     for forbidden in ("time", "filter", "filter_lang"):
diff --git a/tests/waterdata_nearest_test.py b/tests/waterdata_nearest_test.py
index 4dc0ab9d..3f988a6b 100644
--- a/tests/waterdata_nearest_test.py
+++ b/tests/waterdata_nearest_test.py
@@ -265,3 +265,54 @@ def test_forwards_kwargs_to_get_continuous(patch_get_continuous):
     _, kwargs = patch_get_continuous.call_args
     assert kwargs["statistic_id"] == "00011"
     assert kwargs["approval_status"] == "Approved"
+
+
+def test_accepts_single_string_target(patch_get_continuous):
+    """A bare scalar target must round-trip through pd.to_datetime.
+
+    Regression: previously `pd.DatetimeIndex(pd.to_datetime("...", utc=True))`
+    raised TypeError because pd.to_datetime returns a scalar Timestamp for a
+    single-string input.
+    """
+    patch_get_continuous.return_value = (
+        _fake_df([{"time": "2023-06-15T10:30:00Z", "value": 22.4}]),
+        mock.Mock(),
+    )
+    result, _ = get_nearest_continuous(
+        "2023-06-15T10:30:31Z", monitoring_location_id="USGS-02238500"
+    )
+    assert len(result) == 1
+    assert result["target_time"].iloc[0] == pd.Timestamp(
+        "2023-06-15T10:30:31Z", tz="UTC"
+    )
+
+
+def test_accepts_single_timestamp_target(patch_get_continuous):
+    """A single ``pd.Timestamp`` target also round-trips."""
+    patch_get_continuous.return_value = (
+        _fake_df([{"time": "2023-06-15T10:30:00Z", "value": 22.4}]),
+        mock.Mock(),
+    )
+    target = pd.Timestamp("2023-06-15T10:30:31Z", tz="UTC")
+    result, _ = get_nearest_continuous(target, monitoring_location_id="USGS-02238500")
+    assert len(result) == 1
+
+
+def test_missing_time_column_raises_helpful_error(patch_get_continuous):
+    """If the response has no 'time' column (e.g. user passed `properties`
+    that excluded it), raise ValueError instead of crashing with KeyError.
+    """
+    df_no_time = pd.DataFrame(
+        {
+            "value": [22.4],
+            "monitoring_location_id": ["USGS-02238500"],
+        }
+    )
+    patch_get_continuous.return_value = (df_no_time, mock.Mock())
+
+    with pytest.raises(ValueError, match="'time' column"):
+        get_nearest_continuous(
+            ["2023-06-15T10:30:31Z"],
+            monitoring_location_id="USGS-02238500",
+            properties=["value", "monitoring_location_id"],
+        )

From 2df9743222ad3c92d1b9217010285a67eef313f7 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Mon, 4 May 2026 10:09:57 -0500
Subject: [PATCH 2/3] Preserve list-like target inputs and avoid double-tz in
 test

Per copilot review on PR #251:

- _coerce_targets: detect non-DatetimeIndex iterables (Series, ndarray)
  via pd.api.types.is_scalar so the elements are preserved instead of
  being wrapped in a single-element list. Add a regression test passing
  a pd.Series of two timestamps and assert both are processed.
- Tests: drop the redundant tz='UTC' on pd.Timestamp inputs that already
  carry a Z suffix; pandas 2.x raises on double timezone specification.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dataretrieval/waterdata/nearest.py |  8 +++++---
 tests/waterdata_nearest_test.py    | 22 ++++++++++++++++++----
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/dataretrieval/waterdata/nearest.py b/dataretrieval/waterdata/nearest.py
index e2edea7a..ecb70c91 100644
--- a/dataretrieval/waterdata/nearest.py
+++ b/dataretrieval/waterdata/nearest.py
@@ -183,13 +183,15 @@ def _coerce_targets(targets: Any) -> pd.DatetimeIndex:
     """Accept anything ``pandas.to_datetime`` consumes, including a single value.
 
     A bare scalar (string, ``Timestamp``, ``datetime``, …) becomes a
-    one-element ``DatetimeIndex``; an iterable round-trips through
-    ``pd.to_datetime`` directly.
+    one-element ``DatetimeIndex``; an iterable (list, ``Series``, ``ndarray``)
+    is wrapped directly so its elements are preserved.
     """
     parsed = pd.to_datetime(targets, utc=True)
     if isinstance(parsed, pd.DatetimeIndex):
         return parsed
-    return pd.DatetimeIndex([parsed])
+    if pd.api.types.is_scalar(parsed):
+        return pd.DatetimeIndex([parsed])
+    return pd.DatetimeIndex(parsed)
 
 
 def _check_nearest_kwargs(kwargs: dict[str, Any], on_tie: OnTie) -> None:
diff --git a/tests/waterdata_nearest_test.py b/tests/waterdata_nearest_test.py
index 3f988a6b..64deeccd 100644
--- a/tests/waterdata_nearest_test.py
+++ b/tests/waterdata_nearest_test.py
@@ -282,9 +282,7 @@ def test_accepts_single_string_target(patch_get_continuous):
         "2023-06-15T10:30:31Z", monitoring_location_id="USGS-02238500"
     )
     assert len(result) == 1
-    assert result["target_time"].iloc[0] == pd.Timestamp(
-        "2023-06-15T10:30:31Z", tz="UTC"
-    )
+    assert result["target_time"].iloc[0] == pd.Timestamp("2023-06-15T10:30:31Z")
 
 
 def test_accepts_single_timestamp_target(patch_get_continuous):
@@ -293,11 +291,27 @@ def test_accepts_single_timestamp_target(patch_get_continuous):
         _fake_df([{"time": "2023-06-15T10:30:00Z", "value": 22.4}]),
         mock.Mock(),
     )
-    target = pd.Timestamp("2023-06-15T10:30:31Z", tz="UTC")
+    target = pd.Timestamp("2023-06-15T10:30:31Z")
     result, _ = get_nearest_continuous(target, monitoring_location_id="USGS-02238500")
     assert len(result) == 1
 
 
+def test_accepts_pandas_series_targets(patch_get_continuous):
+    """A ``pd.Series`` of timestamps preserves all elements (not just the first)."""
+    patch_get_continuous.return_value = (
+        _fake_df(
+            [
+                {"time": "2023-06-15T10:30:00Z", "value": 22.4},
+                {"time": "2023-06-16T10:30:00Z", "value": 22.5},
+            ]
+        ),
+        mock.Mock(),
+    )
+    targets = pd.Series(["2023-06-15T10:30:31Z", "2023-06-16T10:30:31Z"])
+    result, _ = get_nearest_continuous(targets, monitoring_location_id="USGS-02238500")
+    assert len(result) == 2
+
+
 def test_missing_time_column_raises_helpful_error(patch_get_continuous):
     """If the response has no 'time' column (e.g. user passed `properties`
     that excluded it), raise ValueError instead of crashing with KeyError.

From 001b051accd289858dd2a4c4f987f12fc5ace7d6 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Thu, 25 Jun 2026 09:21:59 -0500
Subject: [PATCH 3/3] refactor(waterdata): collapse _coerce_targets to a single
 scalar guard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The `isinstance(parsed, pd.DatetimeIndex)` early-return was redundant:
`pd.DatetimeIndex(parsed)` returns an equivalent index for an already-
parsed DatetimeIndex, so the three branches reduce to one scalar guard
that wraps a bare value in a list before constructing the index. Behavior
is unchanged for every input kind (DatetimeIndex, scalar string/Timestamp,
list, Series, ndarray, empty) — the existing nearest tests cover them all.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01Sjb14HkwuCydKSKMsaXsgd
---
 dataretrieval/waterdata/nearest.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/dataretrieval/waterdata/nearest.py b/dataretrieval/waterdata/nearest.py
index ecb70c91..edf1a912 100644
--- a/dataretrieval/waterdata/nearest.py
+++ b/dataretrieval/waterdata/nearest.py
@@ -187,10 +187,8 @@ def _coerce_targets(targets: Any) -> pd.DatetimeIndex:
     is wrapped directly so its elements are preserved.
     """
     parsed = pd.to_datetime(targets, utc=True)
-    if isinstance(parsed, pd.DatetimeIndex):
-        return parsed
     if pd.api.types.is_scalar(parsed):
-        return pd.DatetimeIndex([parsed])
+        parsed = [parsed]
     return pd.DatetimeIndex(parsed)