openml · PGijsbers · Jun 10, 2025 · Jun 10, 2025 · Jun 10, 2025 · Jun 10, 2025
diff --git a/.github/workflows/run_all_frameworks.yml b/.github/workflows/run_all_frameworks.yml
@@ -164,9 +164,18 @@ jobs:
         python -m pip install --upgrade pip
         python -m pip install -r requirements.txt
         python -m pip install "coverage[toml]"
+    - name: Check Things
+      run: |
+        ls -lah venv
+        ls -lah venv/bin
+        ls -lah /home/runner/work/automlbenchmark/automlbenchmark/venv/bin
+        echo $(pwd)
     - name: Run ${{ matrix.framework }} on ${{ matrix.task }}
       run: |
         source venv/bin/activate
+        ls /home/runner/work/automlbenchmark/automlbenchmark/venv/bin/coverage
+        /home/runner/work/automlbenchmark/automlbenchmark/venv/bin/coverage --help
+        coverage --help
         coverage run -m runbenchmark ${{ matrix.framework }} ${{ matrix.benchmark }} test -f 0 -t ${{ matrix.task }} -e
         coverage xml
       env:

diff --git a/amlb/utils/serialization.py b/amlb/utils/serialization.py
@@ -1,10 +1,12 @@
 import logging
+
 import math
 import os
 import pickle
 import re
 from typing import Optional
 
+
 from .core import Namespace as ns, json_dump, json_load
 from .process import profile
 
@@ -33,11 +35,10 @@ def _import_data_libraries():
     # the serializer to use when there's no specific serializer available.
     # mainly intended to serialize simple data structures like lists.
     # allowed=['pickle', 'json']
-    fallback_serializer="json",
-    # if numpy can use pickle to serialize ndarrays,
-    numpy_allow_pickle=True,
+    # OPTION REMOVED: Only JSON is allowed. Pickle is evil.
+    # fallback_serializer="json",
     # format used to serialize pandas dataframes/series between processes.
-    # allowed=['pickle', 'parquet', 'hdf', 'json']
+    # allowed=['parquet', 'json']
     pandas_serializer="parquet",
     # the compression format used when serializing pandas dataframes/series.
     # allowed=[None, 'infer', 'bz2', 'gzip']
@@ -163,8 +164,14 @@ def serialize_data(data, path, config: Optional[ns] = None):
     root, ext = os.path.splitext(path)
     np, pd, sp = _import_data_libraries()
     if np and isinstance(data, np.ndarray):
-        path = f"{root}.npy"
-        np.save(path, data, allow_pickle=config.numpy_allow_pickle)
+        if data.dtype == "object":
+            # Numpy cannot save object arrays without pickle
+            path = f"{root}.json"
+            data = data.squeeze().tolist()
+            json_dump(data, path, style="compact")
+        else:
+            path = f"{root}.npy"
+            np.save(path, data, allow_pickle=False)
     elif sp and isinstance(data, sp.spmatrix):
         # use custom extension to recognize sparsed matrices from file name.
         # .npz is automatically appended if missing, and can also potentially be used for numpy arrays.
@@ -177,9 +184,7 @@ def serialize_data(data, path, config: Optional[ns] = None):
             # for example, 'true' and 'false' are converted automatically to booleans, even for column names…
             data.rename(str, axis="columns", inplace=True)
         ser = config.pandas_serializer
-        if ser == "pickle":
-            data.to_pickle(path, compression=config.pandas_compression)
-        elif ser == "parquet":
+        if ser == "parquet":
             if isinstance(data, pd.Series):
                 data = pd.DataFrame({__series__: data})
             # parquet serialization doesn't support sparse dataframes
@@ -189,18 +194,15 @@ def serialize_data(data, path, config: Optional[ns] = None):
                 json_dump(dtypes, f"{path}.dtypes", style="compact")
                 data = unsparsify(data)
             data.to_parquet(path, compression=config.pandas_parquet_compression)
-        elif ser == "hdf":
-            data.to_hdf(path, os.path.basename(path), mode="w", format="table")
         elif ser == "json":
             data.to_json(path, compression=config.pandas_compression)
-    else:  # fallback serializer
-        if config.fallback_serializer == "json":
-            path = f"{root}.json"
-            json_dump(data, path, style="compact")
         else:
-            path = f"{root}.pkl"
-            with open(path, "wb") as f:
-                pickle.dump(data, f)
+            raise ValueError(
+                f"Invalid pandas serialization {ser} must be 'parquet' or 'json'"
+            )
+    else:  # fallback serializer
+        path = f"{root}.json"
+        json_dump(data, path, style="compact")
     return path
 
 
@@ -212,7 +214,7 @@ def deserialize_data(path, config: Optional[ns] = None):
     if ext == ".npy":
         if np is None:
             raise SerializationError(f"Numpy is required to deserialize {path}.")
-        return np.load(path, allow_pickle=config.numpy_allow_pickle)
+        return np.load(path)
     elif ext == ".npz":
         _, ext2 = os.path.splitext(base)
         if ext2 == ".spy":

diff --git a/examples/custom/extensions/GradientBoosting/exec.py b/examples/custom/extensions/GradientBoosting/exec.py
@@ -32,7 +32,6 @@ def run(dataset: Dataset, config: TaskConfig):
 
     save_predictions(
         dataset=dataset,
-        output_file=config.output_predictions_file,
         probabilities=probabilities,
         predictions=predictions,
         truth=y_test,

diff --git a/examples/custom/extensions/Stacking/exec.py b/examples/custom/extensions/Stacking/exec.py
@@ -133,7 +133,6 @@ def run(dataset, config):
     probabilities = estimator.predict_proba(X_test) if is_classification else None
 
     return result(
-        output_file=config.output_predictions_file,
         predictions=predictions,
         truth=y_test,
         probabilities=probabilities,

diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py
@@ -183,7 +183,6 @@ def inference_time_regression(data: Union[str, pd.DataFrame]):
     shutil.rmtree(predictor.path, ignore_errors=True)
 
     return result(
-        output_file=config.output_predictions_file,
         predictions=predictions,
         probabilities=probabilities,
         probabilities_labels=prob_labels,

diff --git a/frameworks/AutoGluon/exec_ts.py b/frameworks/AutoGluon/exec_ts.py
@@ -95,7 +95,6 @@ def run(dataset, config):
     get_reusable_executor().shutdown(wait=True)
 
     return result(
-        output_file=config.output_predictions_file,
         predictions=predictions_only,
         truth=truth_only,
         target_is_encoded=False,

diff --git a/frameworks/FEDOT/exec.py b/frameworks/FEDOT/exec.py
@@ -53,7 +53,6 @@ def run(dataset, config):
     save_artifacts(fedot, config)
 
     return result(
-        output_file=config.output_predictions_file,
         predictions=predictions,
         truth=dataset.test.y,
         probabilities=probabilities,

diff --git a/frameworks/FEDOT/exec_ts.py b/frameworks/FEDOT/exec_ts.py
@@ -104,7 +104,6 @@ def run(dataset, config):
 
     save_artifacts(fedot, config)
     return result(
-        output_file=config.output_predictions_file,
         predictions=all_series_predictions,
         truth=truth_only,
         target_is_encoded=False,

diff --git a/frameworks/GAMA/exec.py b/frameworks/GAMA/exec.py
@@ -123,7 +123,6 @@ def infer(data: Union[str, pd.DataFrame]):
         probabilities = gama_automl.predict_proba(X_test)
 
     return result(
-        output_file=config.output_predictions_file,
         predictions=predictions,
         probabilities=probabilities,
         truth=y_test,

diff --git a/frameworks/H2OAutoML/exec.py b/frameworks/H2OAutoML/exec.py
@@ -203,7 +203,6 @@ def infer(path: str):
         save_artifacts(aml, dataset=dataset, config=config)
 
         return result(
-            output_file=config.output_predictions_file,
             predictions=preds.predictions,
             truth=preds.truth,
             probabilities=preds.probabilities,

diff --git a/frameworks/MLPlan/exec.py b/frameworks/MLPlan/exec.py
@@ -125,7 +125,6 @@ def run(dataset, config):
         target_encoded = False
 
     return result(
-        output_file=config.output_predictions_file,
         predictions=predictions,
         truth=truth,
         probabilities=probabilities,

diff --git a/frameworks/NaiveAutoML/exec.py b/frameworks/NaiveAutoML/exec.py
@@ -105,7 +105,6 @@ def infer(data: Union[str, pd.DataFrame]):
     save_artifacts(automl, config)
 
     return result(
-        output_file=config.output_predictions_file,
         predictions=predictions,
         probabilities=probabilities,
         truth=dataset.test.y,

diff --git a/frameworks/RandomForest/exec.py b/frameworks/RandomForest/exec.py
@@ -132,7 +132,6 @@ def infer(data):
         log.info("Finished inference time measurements.")
 
     return result(
-        output_file=config.output_predictions_file,
         predictions=predictions,
         truth=y_test,
         probabilities=probabilities,

diff --git a/frameworks/SapientML/exec.py b/frameworks/SapientML/exec.py
@@ -79,7 +79,6 @@ def run(dataset, config):
         )
 
         return result(
-            output_file=config.output_predictions_file,
             predictions=predictions,
             truth=y_test,
             probabilities=probabilities,
@@ -88,7 +87,6 @@ def run(dataset, config):
         )
     else:
         return result(
-            output_file=config.output_predictions_file,
             predictions=predictions,
             truth=y_test,
             training_duration=training.duration,

diff --git a/frameworks/TPOT/exec.py b/frameworks/TPOT/exec.py
@@ -131,7 +131,6 @@ def infer(data):
     save_artifacts(tpot, config)
 
     return result(
-        output_file=config.output_predictions_file,
         predictions=predictions,
         truth=y_test,
         probabilities=probabilities,

diff --git a/frameworks/TunedRandomForest/exec.py b/frameworks/TunedRandomForest/exec.py
@@ -286,7 +286,6 @@ def infer(data):
         log.info("Finished inference time measurements.")
 
     return result(
-        output_file=config.output_predictions_file,
         predictions=predictions,
         truth=y_test,
         probabilities=probabilities,

diff --git a/frameworks/autosklearn/exec.py b/frameworks/autosklearn/exec.py
@@ -207,7 +207,6 @@ def sample_one_test_row(seed: int):
     save_artifacts(auto_sklearn, config)
 
     return result(
-        output_file=config.output_predictions_file,
         predictions=predictions,
         truth=dataset.test.y if use_pandas else dataset.test.y_enc,
         probabilities=probabilities,

diff --git a/frameworks/flaml/exec.py b/frameworks/flaml/exec.py
@@ -91,7 +91,6 @@ def infer(data: Union[str, pd.DataFrame]):
     log.info(f"Finished predict in {predict.duration}s.")
 
     return result(
-        output_file=config.output_predictions_file,
         probabilities=probabilities,
         predictions=predictions,
         truth=y_test,

diff --git a/frameworks/hyperoptsklearn/exec.py b/frameworks/hyperoptsklearn/exec.py
@@ -117,7 +117,6 @@ def default():
         probabilities = None
 
     return result(
-        output_file=config.output_predictions_file,
         predictions=predictions,
         truth=y_test,
         probabilities=probabilities,

diff --git a/frameworks/lightautoml/exec.py b/frameworks/lightautoml/exec.py
@@ -99,7 +99,6 @@ def infer(data: Union[str, pd.DataFrame]):
     save_artifacts(automl, config)
 
     return result(
-        output_file=config.output_predictions_file,
         probabilities_labels=probabilities_labels,
         probabilities=probabilities,
         predictions=predictions,

diff --git a/frameworks/mljarsupervised/exec.py b/frameworks/mljarsupervised/exec.py
@@ -105,7 +105,6 @@ def infer(data: Union[str, pd.DataFrame]):
         shutil.rmtree(results_path, ignore_errors=True)
 
     return result(
-        output_file=config.output_predictions_file,
         predictions=predictions,
         truth=y_test,
         probabilities=probabilities,

diff --git a/frameworks/oboe/exec.py b/frameworks/oboe/exec.py
@@ -123,7 +123,6 @@ def aml_models():
         probabilities = None
 
     return result(
-        output_file=config.output_predictions_file,
         predictions=predictions,
         truth=y_test,
         probabilities=probabilities,

diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py
@@ -27,7 +27,6 @@ class FrameworkError(Exception):
 
 
 def result(
-    output_file=None,
     predictions=None,
     truth=None,
     probabilities=None,
@@ -94,6 +93,7 @@ def load_data(name, path, **_):
                     path = os.path.join(config.result_dir, ".".join([name, "data"]))
                     res[name] = serialize_data(arr, path, config=ser_config)
     except BaseException as e:
+        log.error("Integration script failed with uncaught exception:")
         log.exception(e)
         res = dict(error_message=str(e), models_count=0)
     finally:
@@ -107,6 +107,8 @@ def load_data(name, path, **_):
         )
         json_dump(inference_measurements, inference_file, style="compact")
         res["others"]["inference_times"] = str(inference_file)
+
+    res.setdefault("output_file", config.output_predictions_file)
     json_dump(res, config.result_file, style="compact")
 
 

diff --git a/frameworks/shared/setup.sh b/frameworks/shared/setup.sh
@@ -44,13 +44,19 @@ PIP() {
   $pip_exec "$@"
 }
 
-#if [[ -x "$(command -v $PY_VENV/bin/activate)" ]]; then
-#    $PY_ROOT/activate
-#fi
-
-#echo "PY=$(command -v PY)"
-#echo "PIP=$(command -v PIP)"
 echo "PY=$py_exec"
 echo "PIP=$pip_exec"
 
-PIP install --no-cache-dir -r $SHARED_DIR/requirements.txt
+REQ_FILE="$SHARED_DIR/requirements.txt"
+
+for line in $(grep -vE '^\s*#' "$REQ_FILE" | grep -vE '^\s*$'); do
+    pkg=$(echo "$line" | sed -E 's/[=><~!].*$//')
+    # In a line like "numpy==1.12.0" then pkg=numpy and line is the whole line
+
+    if ! PY -c "import $pkg" &> /dev/null; then
+        echo "$pkg not found. Installing from requirements.txt..."
+        PIP install --no-cache-dir "$line"
+    else
+        echo "$pkg is already installed by the framework, using that instead."
+    fi
+done