From cd09e6cf9229c1846fdf54fb440d8a00fbde35bf Mon Sep 17 00:00:00 2001 From: q1uf3ng Date: Fri, 8 May 2026 13:58:47 +0800 Subject: [PATCH] fix: add path traversal validation to save/load datapoints endpoints User-supplied paths in _save_datapoints and _load_datapoints are passed directly to file I/O without validation. An attacker can use path traversal sequences to read or write arbitrary files on the server. Add _validate_data_path() that resolves the path and ensures it stays within the configured data_dir. --- lit_nlp/app.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/lit_nlp/app.py b/lit_nlp/app.py index 7d8afba5..f8e07090 100644 --- a/lit_nlp/app.py +++ b/lit_nlp/app.py @@ -215,6 +215,18 @@ def _reconstitute_inputs( ) return [index[ex] if isinstance(ex, str) else ex for ex in inputs] + def _validate_data_path(self, path: str) -> str: + """Validate that a user-supplied path does not escape the data directory.""" + resolved = os.path.realpath(path) + if self._data_dir: + base = os.path.realpath(self._data_dir) + if not resolved.startswith(base + os.sep) and resolved != base: + raise ValueError( + f'Path must be within data_dir ({self._data_dir})') + elif '..' in os.path.normpath(path).split(os.sep): + raise ValueError('Path traversal is not allowed') + return resolved + def _save_datapoints( self, data, @@ -231,6 +243,7 @@ def _save_datapoints( if self._demo_mode: logging.warning('Attempted to save datapoints in demo mode.') return None + path = self._validate_data_path(path) return self._datasets[dataset_name].save(data['inputs'], path) def _load_datapoints( @@ -249,6 +262,7 @@ def _load_datapoints( if self._demo_mode: logging.warning('Attempted to load datapoints in demo mode.') return None + path = self._validate_data_path(path) dataset = self._datasets[dataset_name].load(path) return dataset.indexed_examples