Skip to content
67 changes: 53 additions & 14 deletions amlb/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@
from .datautils import read_csv
from .resources import get as rget, config as rconfig, output_dirs as routput_dirs
from .results import ErrorResult, Scoreboard, TaskResult
from .utils import Namespace as ns, OSMonitoring, as_list, datetime_iso, flatten, json_dump, lazy_property, profile, repr_def, \
run_cmd, run_script, signal_handler, str2bool, str_sanitize, system_cores, system_memory_mb, system_volume_mb, touch

from .utils import Namespace as ns, OSMonitoring, as_list, datetime_iso, flatten, \
json_dump, lazy_property, profile, repr_def, \
run_cmd, run_script, signal_handler, str2bool, str_sanitize, system_cores, \
system_memory_mb, system_volume_mb, touch, Namespace

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -371,9 +372,33 @@ def _is_task_enabled(task_def):

class TaskConfig:

def __init__(self, name, fold, metrics, seed,
def __init__(self, *, name, fold, seed,
max_runtime_seconds, cores, max_mem_size_mb, min_vol_size_mb,
input_dir, output_dir):
input_dir, output_dir,
metrics: Union[list[str], str, None] = None,
optimization_metrics: Union[list[str], str, None] = None,
evaluation_metrics: Union[list[str], str, None] = None,
):

if metrics:
log.warning(
"WARNING: The `metric` field of the task definition is deprecated"
" and will not work in the future. Please specify the metric(s) to "
"optimize for with `optimization_metrics` and any additional metric(s) "
"used only for evaluation in `evaluation_metrics`."
)
if optimization_metrics:
raise ValueError(
"Detected both `metric` and `optimization_metrics` for task "
f"'{name}'. Aborting because desired setup is unclear."
"Please only use `optimization_metrics`."
)
optimization_metrics = as_list(metrics)[:1]
evaluation_metrics = as_list(metrics)[1:]

self.optimization_metrics = optimization_metrics or []
self._evaluation_metrics = evaluation_metrics or []

self.framework = None
self.framework_params = None
self.framework_version = None
Expand All @@ -391,16 +416,28 @@ def __init__(self, name, fold, metrics, seed,
self.output_predictions_file = os.path.join(output_dir, "predictions.csv")
self.ext = ns() # used if frameworks require extra config points

@property
def evaluation_metrics(self) -> list[str]:
return self.optimization_metrics + self._evaluation_metrics

def load_default_metrics(self, *, dataset_type: str):
""" Sets `optimization/evaluation_metrics` based on defaults from config.yaml"""
metrics = as_list(rconfig().benchmarks.metrics[dataset_type])
self.optimization_metrics = metrics[:1]
self._evaluation_metrics = metrics[1:]

def __setattr__(self, name, value):
if name == 'metrics':
self.metric = value[0] if isinstance(value, list) else value
elif name == 'max_runtime_seconds':
self.job_timeout_seconds = min(value * 2,
value + rconfig().benchmarks.overhead_time_seconds)
if name == 'max_runtime_seconds':
self.job_timeout_seconds = min(
value * 2,
value + rconfig().benchmarks.overhead_time_seconds
)
super().__setattr__(name, value)

def __json__(self):
return self.__dict__
d = self.__dict__
d["evaluation_metrics"] = self.evaluation_metrics
return d
Comment thread
PGijsbers marked this conversation as resolved.
Outdated

def __repr__(self):
return repr_def(self)
Expand Down Expand Up @@ -458,10 +495,13 @@ def __init__(self, benchmark: Benchmark, task_def, fold):
self.benchmark = benchmark
self._task_def = task_def
self.fold = fold

self.task_config = TaskConfig(
name=task_def.name,
fold=fold,
metrics=task_def.metric,
optimization_metrics=Namespace.get(task_def, "optimization_metrics"),
evaluation_metrics=Namespace.get(task_def, "evaluation_metrics"),
seed=rget().seed(fold),
max_runtime_seconds=task_def.max_runtime_seconds,
cores=task_def.cores,
Expand Down Expand Up @@ -542,9 +582,8 @@ def run(self):
task_config.output_predictions_file = results._predictions_file
task_config.output_metadata_file = results._metadata_file
touch(os.path.dirname(task_config.output_predictions_file), as_dir=True)
if task_config.metrics is None:
task_config.metrics = as_list(rconfig().benchmarks.metrics[self._dataset.type.name])
task_config.metric = task_config.metrics[0]
if not task_config.optimization_metrics:
task_config.load_default_metrics(dataset_type=self._dataset.type.name)

result = meta_result = None
try:
Expand Down
28 changes: 16 additions & 12 deletions amlb/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,8 +426,11 @@ def compute_score(self, result=None, meta_result=None):
seed=metadata.seed,
app_version=rget().app_version,
utc=datetime_iso(),
metric=metadata.metric,
duration=nan
evaluation_metrics=metadata.evaluation_metrics,
optimization_metrics=metadata.optimization_metrics,
duration=nan,
result_metrics=[],
result=[],
)
required_meta_res = ['training_duration', 'predict_duration', 'models_count']
for m in required_meta_res:
Expand All @@ -443,21 +446,22 @@ def do_score(m):
return score

def set_score(score):
entry.metric = score.metric
entry.result = score.value
if score.higher_is_better is False: # if unknown metric, and higher_is_better is None, then no change
entry.metric = f"neg_{entry.metric}"
entry.result = - entry.result
metric = score.metric if score.higher_is_better else f"neg_{score.metric}"
result = score.value if score.higher_is_better else -score.value
entry.result_metrics.append(metric)
entry.result.append(result)

for metric in metadata.metrics or []:
for metric in metadata.evaluation_metrics:
sc = do_score(metric)
entry[metric] = sc.value
if metric == entry.metric:
if metric in entry.optimization_metrics:
set_score(sc)

if 'result' not in entry:
set_score(do_score(entry.metric))

entry.result = tuple(entry.result)
entry.result_metrics = tuple(entry.result_metrics)
entry.evaluation_metrics = tuple(entry.evaluation_metrics)
entry.optimization_metrics = tuple(entry.optimization_metrics)
entry.metric = entry.optimization_metrics
entry.info = result.info
if scoring_errors:
entry.info = "; ".join(filter(lambda it: it, [entry.info, *scoring_errors]))
Expand Down