Skip to content

Commit 64b2a5b

Browse files
committed
Implement automatic pagination for EIA data fetching
1 parent 7f6b156 commit 64b2a5b

2 files changed

Lines changed: 70 additions & 13 deletions

File tree

src/open_data_pvnet/scripts/fetch_eia_data.py

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -77,25 +77,45 @@ def get_data(
7777
else:
7878
params[f"facets[{key}][]"] = value
7979

80+
all_data = []
81+
8082
try:
81-
logger.info(f"Fetching data from {url}...")
82-
response = requests.get(url, params=params)
83-
response.raise_for_status()
83+
current_offset = offset
84+
while True:
85+
# Create a fresh copy of params for each request to avoid mutating history
86+
request_params = params.copy()
87+
request_params["offset"] = current_offset
88+
89+
logger.info(f"Fetching data from {url}, offset={current_offset}...")
90+
response = requests.get(url, params=request_params)
91+
response.raise_for_status()
92+
93+
payload = response.json()
94+
if "response" in payload and "data" in payload["response"]:
95+
data = payload["response"]["data"]
96+
if not data:
97+
logger.info("No more data returned from API.")
98+
break
99+
100+
all_data.extend(data)
101+
102+
if len(data) < length:
103+
break
104+
105+
current_offset += length
106+
else:
107+
logger.error(f"Unexpected API response format: {payload.keys()}")
108+
break
84109

85-
payload = response.json()
86-
if "response" in payload and "data" in payload["response"]:
87-
data = payload["response"]["data"]
88-
if not data:
89-
logger.warning("No data returned from API.")
90-
return None
91-
return pd.DataFrame(data)
92-
else:
93-
logger.error(f"Unexpected API response format: {payload.keys()}")
110+
if not all_data:
111+
logger.warning("No data retrieved.")
94112
return None
113+
114+
return pd.DataFrame(all_data)
95115

96116
except requests.exceptions.RequestException as e:
97117
logger.error(f"Request failed: {e}")
98-
if response is not None:
118+
if 'response' in locals() and response is not None:
99119
logger.error(f"Response: {response.text}")
100120
return None
101121

tests/test_eia.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,40 @@ def test_get_dataset_success(mock_response):
107107
assert "datetime_gmt" in ds.coords or "datetime_gmt" in ds.indexes
108108
assert "value" in ds.data_vars
109109
assert len(ds.datetime_gmt) == 2
110+
111+
def test_get_data_pagination():
112+
page1 = {
113+
"response": {
114+
"data": [
115+
{"period": "2023-01-01T00", "value": 100},
116+
{"period": "2023-01-01T01", "value": 150},
117+
]
118+
}
119+
}
120+
page2 = {
121+
"response": {
122+
"data": [
123+
{"period": "2023-01-01T02", "value": 200},
124+
]
125+
}
126+
}
127+
128+
mock_resp1 = Mock()
129+
mock_resp1.json.return_value = page1
130+
mock_resp1.raise_for_status.return_value = None
131+
132+
mock_resp2 = Mock()
133+
mock_resp2.json.return_value = page2
134+
mock_resp2.raise_for_status.return_value = None
135+
136+
with patch("requests.get", side_effect=[mock_resp1, mock_resp2]) as mock_get:
137+
eia = EIAData(api_key="test_key")
138+
139+
df = eia.get_data("route", "start", "end", length=2)
140+
141+
assert len(df) == 3
142+
assert mock_get.call_count == 2
143+
144+
call_args_list = mock_get.call_args_list
145+
assert call_args_list[0][1]["params"]["offset"] == 0
146+
assert call_args_list[1][1]["params"]["offset"] == 2

0 commit comments

Comments
 (0)