From 6ce750518959a89d8dd1a095865937f7f777f4bf Mon Sep 17 00:00:00 2001 From: YuanHe Date: Fri, 19 Jun 2026 22:54:21 -0700 Subject: [PATCH] feat(examples/strands_sglang): update to strands-sglang 0.4.2 Migrate the example to the strands-sglang 0.4.2 API: - Read the token trajectory via the new `model.rollout` (`Rollout`) API instead of the removed `model.token_manager` (`initial_prompt_length`, `token_ids`/`loss_mask`/`logprobs`). - Use a self-contained `subprocess_interpreter.py` for the `execute_python_code` tool, keeping the example dependency-light. - Drop the one-line `requirements.txt` (the only extra dep is `strands-sglang`, installed directly) and refresh the README accordingly. --- examples/strands_sglang/README.md | 8 +- examples/strands_sglang/__init__.py | 0 .../strands_sglang/generate_with_strands.py | 19 +- examples/strands_sglang/requirements.txt | 2 - .../strands_sglang/subprocess_interpreter.py | 480 ++++++++++++++++++ 5 files changed, 492 insertions(+), 17 deletions(-) create mode 100644 examples/strands_sglang/__init__.py delete mode 100644 examples/strands_sglang/requirements.txt create mode 100644 examples/strands_sglang/subprocess_interpreter.py diff --git a/examples/strands_sglang/README.md b/examples/strands_sglang/README.md index 1d5864a47d..f6978fa8c6 100644 --- a/examples/strands_sglang/README.md +++ b/examples/strands_sglang/README.md @@ -13,7 +13,7 @@ This example connects `slime` with [`strands-sglang`](https://github.com/horizon `strands-sglang` bridges the gap by extending `strands` with SGLang's native `/generate` endpoint: - Captures exact token IDs during generation (no retokenization drift) -- Automatically tracks `loss_mask` via `token_manager` +- Automatically tracks `loss_mask` via the `Rollout` tracker (`model.rollout`) - Provides `ToolLimiter` for clean trajectory truncation ## Install Dependencies @@ -22,11 +22,9 @@ This example connects `slime` with [`strands-sglang`](https://github.com/horizon 2. Go to slime folder: `cd /root/slime` 3. Install slime: `pip install -e . --no-deps` 4. Go to the example folder: `cd /root/slime/examples/strands_sglang` -5. Install other dependencies: `pip install -r requirements.txt` +5. Install `strands-sglang`: `pip install strands-sglang==0.4.2` -> NOTE: `strands-sglang` is under rapid development, so we recommend using the GitHub repo version: `strands-sglang @ git+https://github.com/horizon-rl/strands-sglang.git` - -> NOTE: We use camel-ai's subprocess code interpreter for python code execution, which is NOT a good practice; it's just for convenience of this example. +> NOTE: The `execute_python_code` tool runs code via `subprocess_interpreter.py`, a self-contained interpreter vendored from camel-ai so this example does not depend on the full `camel-ai` package. It runs model-generated code in a local subprocess with **no isolation**, which is NOT a good practice; it is here only for the convenience of this example. Use a sandboxed interpreter (Docker, e2b, microsandbox, ...) for anything beyond local experimentation. ## Prepare Model diff --git a/examples/strands_sglang/__init__.py b/examples/strands_sglang/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/strands_sglang/generate_with_strands.py b/examples/strands_sglang/generate_with_strands.py index 42484e1298..d62ee79780 100644 --- a/examples/strands_sglang/generate_with_strands.py +++ b/examples/strands_sglang/generate_with_strands.py @@ -1,7 +1,5 @@ -# Updated with strands-sglang 0.3.2 import logging -from camel.interpreters import SubprocessInterpreter from strands import Agent, tool from strands_sglang import SGLangModel, ToolLimiter, get_client_from_slime_args from strands_sglang.tool_parsers import HermesToolParser @@ -10,6 +8,8 @@ from slime.rollout.sglang_rollout import GenerateState from slime.utils.types import Sample +from .subprocess_interpreter import SubprocessInterpreter + logger = logging.getLogger(__name__) SYSTEM_PROMPT = """ @@ -22,7 +22,6 @@ """.strip() MAX_TOOL_ITERS = 5 -MAX_TOOL_CALLS = None # No limit @tool @@ -51,7 +50,7 @@ async def generate(args, sample: Sample, sampling_params) -> Sample: sampling_params=sampling_params, ) - tool_limiter = ToolLimiter(max_tool_iters=MAX_TOOL_ITERS, max_tool_calls=MAX_TOOL_CALLS) + tool_limiter = ToolLimiter(max_tool_iters=MAX_TOOL_ITERS) agent = Agent( model=model, tools=[execute_python_code], @@ -71,12 +70,12 @@ async def generate(args, sample: Sample, sampling_params) -> Sample: sample.status = Sample.Status.TRUNCATED logger.warning(f"TRUNCATED: {type(e).__name__}: {e}") - # Extract token trajectory from token_manager - tm = model.token_manager - prompt_len = len(tm.segments[0]) # system + user are first segment - sample.tokens = tm.token_ids - sample.loss_mask = tm.loss_mask[prompt_len:] - sample.rollout_log_probs = tm.logprobs[prompt_len:] + # Extract token trajectory from the rollout tracker + rollout = model.rollout + prompt_len = rollout.initial_prompt_length # system + user are the first segment + sample.tokens = rollout.token_ids + sample.loss_mask = rollout.loss_mask[prompt_len:] + sample.rollout_log_probs = rollout.logprobs[prompt_len:] sample.response_length = len(sample.tokens) - prompt_len sample.response = model.tokenizer.decode(sample.tokens[prompt_len:], skip_special_tokens=False) # Tool iteration and tool call count are different because multiple parallel tool calls count as 1 iteration diff --git a/examples/strands_sglang/requirements.txt b/examples/strands_sglang/requirements.txt deleted file mode 100644 index f933733284..0000000000 --- a/examples/strands_sglang/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -camel-ai -strands-sglang diff --git a/examples/strands_sglang/subprocess_interpreter.py b/examples/strands_sglang/subprocess_interpreter.py new file mode 100644 index 0000000000..b6e0493767 --- /dev/null +++ b/examples/strands_sglang/subprocess_interpreter.py @@ -0,0 +1,480 @@ +# Copyright 2025-2026 Strands RL Contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Vendored from CAMEL-AI (https://github.com/camel-ai/camel, +# camel/interpreters/subprocess_interpreter.py), Apache License 2.0, +# Copyright 2023-2026 @ CAMEL-AI.org. Copied here so this example does not +# depend on the full `camel-ai` package. Deviations from the original: +# - `InterpreterError` and `BaseInterpreter` are inlined (no `camel` imports) +# - uses the stdlib `logging` module instead of `camel.logger` +# - uses stdlib `ast.unparse()` instead of the third-party `astor` package +# - dropped the cosmetic `colorama` coloring of printed output + +import ast +import logging +import os +import subprocess +import sys +import tempfile +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any, ClassVar + +logger = logging.getLogger(__name__) + + +class InterpreterError(Exception): + r"""Exception raised for errors that can be solved by regenerating code.""" + + +class BaseInterpreter(ABC): + r"""An abstract base class for code interpreters.""" + + @abstractmethod + def run(self, code: str, code_type: str) -> str: + r"""Executes the given code based on its type.""" + + @abstractmethod + def supported_code_types(self) -> list[str]: + r"""Provides supported code types by the interpreter.""" + + @abstractmethod + def update_action_space(self, action_space: dict[str, Any]) -> None: + r"""Updates action space for *python* interpreter.""" + + @abstractmethod + def execute_command(self, command: str) -> str | tuple[str, str]: + r"""Executes a command in the interpreter.""" + + +class SubprocessInterpreter(BaseInterpreter): + r"""SubprocessInterpreter is a class for executing code files or code + strings in a subprocess. + + This class handles the execution of code in different scripting languages + (currently Python and Bash) within a subprocess, capturing their + stdout and stderr streams, and allowing user checking before executing code + strings or shell commands. + + Args: + require_confirm (bool, optional): If True, prompt user before running + code strings or shell commands for security. + (default: :obj:`True`) + print_stdout (bool, optional): If True, print the standard output of + the executed code. (default: :obj:`False`) + print_stderr (bool, optional): If True, print the standard error of the + executed code. (default: :obj:`True`) + execution_timeout (int, optional): Maximum time in seconds to wait for + code execution to complete. (default: :obj:`60`) + """ + + _CODE_EXECUTE_CMD_MAPPING: ClassVar[dict[str, dict[str, str]]] = { + "python": {"posix": "python {file_name}", "nt": "python {file_name}"}, + "bash": {"posix": "bash {file_name}", "nt": "bash {file_name}"}, + "r": {"posix": "Rscript {file_name}", "nt": "Rscript {file_name}"}, + } + + _CODE_EXTENSION_MAPPING: ClassVar[dict[str, str]] = { + "python": "py", + "bash": "sh", + "r": "R", + } + + _CODE_TYPE_MAPPING: ClassVar[dict[str, str]] = { + "python": "python", + "py3": "python", + "python3": "python", + "py": "python", + "shell": "bash", + "bash": "bash", + "sh": "bash", + "r": "r", + "R": "r", + } + + def __init__( + self, + require_confirm: bool = True, + print_stdout: bool = False, + print_stderr: bool = True, + execution_timeout: int = 60, + ) -> None: + self.require_confirm = require_confirm + self.print_stdout = print_stdout + self.print_stderr = print_stderr + self.execution_timeout = execution_timeout + + def run_file( + self, + file: Path, + code_type: str = "python", + ) -> str: + r"""Executes a code file in a subprocess and captures its output. + + Args: + file (Path): The path object of the file to run. + code_type (str): The type of code to execute (e.g., 'python', + 'bash'). (default: obj:`python`) + + Returns: + str: A string containing the captured stdout and stderr of the + executed code. + """ + if not file.is_file(): + return f"{file} is not a file." + code_type = self._check_code_type(code_type) + temp_file = None + if self._CODE_TYPE_MAPPING[code_type] == "python": + # For Python code, use ast to analyze and modify the code + with open(file, encoding="utf-8") as f: + source = f.read() + + # Parse the source code + try: + tree = ast.parse(source) + # Get the last node + if tree.body: + last_node = tree.body[-1] + # Handle expressions that would normally not produce output + # For example: In a REPL, typing '1 + 2' should show '3' + + if isinstance(last_node, ast.Expr): + # Only wrap in print(repr()) if it's not already a + # print call + if not ( + isinstance(last_node.value, ast.Call) + and isinstance(last_node.value.func, ast.Name) + and last_node.value.func.id == "print" + ): + # Transform the AST to wrap the expression in print + # (repr()) + # Example transformation: + # Before: x + y + # After: print(repr(x + y)) + tree.body[-1] = ast.Expr( + value=ast.Call( + # Create print() function call + func=ast.Name(id="print", ctx=ast.Load()), + args=[ + ast.Call( + # Create repr() function call + func=ast.Name(id="repr", ctx=ast.Load()), + # Pass the original expression as + # argument to repr() + args=[last_node.value], + keywords=[], + ) + ], + keywords=[], + ) + ) + # Fix missing source locations + ast.fix_missing_locations(tree) + # Convert back to source (stdlib ast.unparse, Python 3.9+) + modified_source = ast.unparse(tree) + # Create a temporary file with the modified source + temp_file = self._create_temp_file(modified_source, "py") + cmd = ["python", str(temp_file)] + except (SyntaxError, TypeError, ValueError) as e: + logger.warning("Failed to parse Python code with AST: %s", e) + platform_type = "posix" if os.name != "nt" else "nt" + cmd_template = self._CODE_EXECUTE_CMD_MAPPING[code_type][platform_type] + base_cmd = cmd_template.split()[0] + + # Check if command is available + if not self._is_command_available(base_cmd): + raise InterpreterError( + f"Command '{base_cmd}' not found. Please ensure it is installed and available in your PATH." + ) from None + + cmd = [base_cmd, str(file)] + else: + # For non-Python code, use standard execution + platform_type = "posix" if os.name != "nt" else "nt" + cmd_template = self._CODE_EXECUTE_CMD_MAPPING[code_type][platform_type] + base_cmd = cmd_template.split()[0] # Get 'python', 'bash', etc. + + # Check if command is available + if not self._is_command_available(base_cmd): + raise InterpreterError( + f"Command '{base_cmd}' not found. Please ensure it is installed and available in your PATH." + ) + + cmd = [base_cmd, str(file)] + + # Get current Python executable's environment + env = os.environ.copy() + + # On Windows, ensure we use the correct Python executable path + if os.name == "nt": + python_path = os.path.dirname(sys.executable) + if "PATH" in env: + env["PATH"] = python_path + os.pathsep + env["PATH"] + else: + env["PATH"] = python_path + + try: + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + env=env, + shell=False, # Never use shell=True for security + ) + # Add timeout to prevent hanging processes + stdout, stderr = proc.communicate(timeout=self.execution_timeout) + return_code = proc.returncode + except subprocess.TimeoutExpired: + proc.kill() + stdout, stderr = proc.communicate() + return_code = proc.returncode + timeout_msg = f"Process timed out after {self.execution_timeout} seconds and was terminated." + stderr = f"{stderr}\n{timeout_msg}" + + # Clean up temporary file if it was created + if temp_file is not None: + try: + if temp_file.exists(): + try: + temp_file.unlink() + except PermissionError: + # On Windows, files might be locked + logger.warning("Could not delete temp file %s (may be locked)", temp_file) + except Exception as e: + logger.warning("Failed to cleanup temporary file: %s", e) + + if self.print_stdout and stdout: + print("======stdout======") + print(stdout) + print("==================") + if self.print_stderr and stderr: + print("======stderr======") + print(stderr) + print("==================") + + # Build the execution result + exec_result = "" + if stdout: + exec_result += stdout + if stderr: + exec_result += f"(stderr: {stderr})" + if return_code != 0: + error_msg = f"(Execution failed with return code {return_code})" + if not stderr: + exec_result += error_msg + elif error_msg not in stderr: + exec_result += error_msg + return exec_result + + def run( + self, + code: str, + code_type: str, + ) -> str: + r"""Generates a temporary file with the given code, executes it, and + deletes the file afterward. + + Args: + code (str): The code string to execute. + code_type (str): The type of code to execute (e.g., 'python', + 'bash'). + + Returns: + str: A string containing the captured stdout and stderr of the + executed code. + + Raises: + InterpreterError: If the user declines to run the code or if the + code type is unsupported. + """ + code_type = self._check_code_type(code_type) + + if self.require_confirm: + self._confirm_execution( + message=(f"The following {code_type} code will run on your computer: {code}"), + prompt="Running code? [y/N]:", + declined_message=( + "Execution halted: User opted not to run the code. " + "This choice stops the current operation and any " + "further code execution." + ), + ) + + temp_file_path = None + temp_dir = None + try: + temp_file_path = self._create_temp_file(code=code, extension=self._CODE_EXTENSION_MAPPING[code_type]) + temp_dir = temp_file_path.parent + return self.run_file(temp_file_path, code_type) + finally: + # Clean up temp file and directory + try: + if temp_file_path and temp_file_path.exists(): + try: + temp_file_path.unlink() + except PermissionError: + # On Windows, files might be locked + logger.warning("Could not delete temp file %s", temp_file_path) + + if temp_dir and temp_dir.exists(): + try: + import shutil + + shutil.rmtree(temp_dir, ignore_errors=True) + except Exception as e: + logger.warning("Could not delete temp directory: %s", e) + except Exception as e: + logger.warning("Error during cleanup: %s", e) + + def _create_temp_file(self, code: str, extension: str) -> Path: + r"""Creates a temporary file with the given code and extension. + + Args: + code (str): The code to write to the temporary file. + extension (str): The file extension to use. + + Returns: + Path: The path to the created temporary file. + """ + try: + # Create a temporary directory first to ensure we have write + # permissions + temp_dir = tempfile.mkdtemp() + # Create file path with appropriate extension + file_path = Path(temp_dir) / f"temp_code.{extension}" + + # Write code to file with appropriate encoding + with open(file_path, "w", encoding="utf-8") as f: + f.write(code) + + return file_path + except Exception as e: + # Clean up temp directory if creation failed + if "temp_dir" in locals(): + try: + import shutil + + shutil.rmtree(temp_dir, ignore_errors=True) + except Exception: + pass + logger.error("Failed to create temporary file: %s", e) + raise + + def _check_code_type(self, code_type: str) -> str: + if code_type not in self._CODE_TYPE_MAPPING: + raise InterpreterError( + f"Unsupported code type {code_type}. Currently " + f"`{self.__class__.__name__}` only supports " + f"{', '.join(self._CODE_EXTENSION_MAPPING.keys())}." + ) + return self._CODE_TYPE_MAPPING[code_type] + + def _confirm_execution(self, message: str, prompt: str, declined_message: str) -> None: + r"""Prompt the user before executing local subprocess work.""" + logger.info(message) + while True: + choice = input(prompt).lower().strip() + if choice in ["y", "yes", "ye"]: + return + if choice in ["no", "n", ""]: + raise InterpreterError(declined_message) + print("Please enter 'y' or 'n'.") + + def supported_code_types(self) -> list[str]: + r"""Provides supported code types by the interpreter.""" + return list(self._CODE_EXTENSION_MAPPING.keys()) + + def update_action_space(self, action_space: dict[str, Any]) -> None: + r"""Updates action space for *python* interpreter.""" + raise RuntimeError("SubprocessInterpreter doesn't support `action_space`.") + + def _is_command_available(self, command: str) -> bool: + r"""Check if a command is available in the system PATH. + + Args: + command (str): The command to check. + + Returns: + bool: True if the command is available, False otherwise. + """ + if os.name == "nt": # Windows + # On Windows, use where.exe to find the command + try: + with open(os.devnull, "w") as devnull: + subprocess.check_call( + ["where", command], + stdout=devnull, + stderr=devnull, + shell=False, + ) + return True + except subprocess.CalledProcessError: + return False + else: # Unix-like systems + # On Unix-like systems, use which to find the command + try: + with open(os.devnull, "w") as devnull: + subprocess.check_call( + ["which", command], + stdout=devnull, + stderr=devnull, + shell=False, + ) + return True + except subprocess.CalledProcessError: + return False + + def execute_command(self, command: str) -> tuple[str, str]: + r"""Executes a shell command in a subprocess and captures its output. + + Args: + command (str): The shell command to execute. + + Returns: + tuple: A tuple containing the captured stdout and stderr of the + executed command. + + Raises: + InterpreterError: If the command execution fails. + """ + if self.require_confirm: + self._confirm_execution( + message=(f"The following shell command will run on your computer: {command}"), + prompt="Running command? [y/N]:", + declined_message=( + "Execution halted: User opted not to run the " + "command. This choice stops the current operation " + "and any further command execution." + ), + ) + + try: + # Get current Python executable's environment + env = os.environ.copy() + + proc = subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + env=env, + shell=True, # Use shell=True for command execution + ) + # Add timeout to prevent hanging processes + stdout, stderr = proc.communicate(timeout=self.execution_timeout) + + return stdout, stderr + except Exception as e: + raise InterpreterError(f"Error executing command: {e}") from e