From 1d63a853bb7d639fc4cab0a658d935b0f88136ca Mon Sep 17 00:00:00 2001 From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Date: Mon, 22 Jun 2026 18:13:56 -0700 Subject: [PATCH] fix(retool): coerce list prompt to str in reward_func With --apply-chat-template, sample.prompt is a list[dict], so sample.prompt + sample.response raised TypeError. Coerce to str first, matching slime's own str(sample.prompt) idiom in sglang_rollout. Fixes #1829 --- examples/retool/generate_with_retool.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/retool/generate_with_retool.py b/examples/retool/generate_with_retool.py index ebb5c4f1a..e0bb33d64 100644 --- a/examples/retool/generate_with_retool.py +++ b/examples/retool/generate_with_retool.py @@ -410,7 +410,8 @@ async def reward_func(args, sample, **kwargs): raise TypeError("Sample must be an instance of Sample class.") # Build complete solution string - solution_str = sample.prompt + sample.response + prompt_str = sample.prompt if isinstance(sample.prompt, str) else str(sample.prompt) + solution_str = prompt_str + sample.response # Get ground truth answer - label is a string, not a dict ground_truth = sample.label if sample.label is not None else ""