From 45281ae881c224f96aeb7bc904cfa30d3cd064a6 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 1 Jun 2026 22:59:43 -0400 Subject: [PATCH 01/13] Use in-memory event history for condenser replay Co-authored-by: openhands --- agents/openhands_sdk/condensation_sft.py | 57 ++++++++++++++++++++---- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/agents/openhands_sdk/condensation_sft.py b/agents/openhands_sdk/condensation_sft.py index d85a9094..dc14d5e8 100644 --- a/agents/openhands_sdk/condensation_sft.py +++ b/agents/openhands_sdk/condensation_sft.py @@ -15,6 +15,8 @@ from openhands.sdk.context.condenser import LLMSummarizingCondenser from openhands.sdk.context.condenser.utils import get_total_token_count from openhands.sdk.context.view import View +from openhands.sdk.event import LLMConvertibleEvent as SDKEvent +from openhands.sdk.event import MessageEvent, SystemPromptEvent from openhands.sdk.event.condenser import Condensation from openhands.sdk.llm.llm_response import LLMResponse from openhands.sdk.tool import ToolDefinition @@ -98,6 +100,21 @@ def format_messages(llm: LLM, messages: list[Message]) -> list[dict[str, Any]]: return normalize_message_content(llm.format_messages_for_llm(messages)) +class TrackingSDKEventBuilder(SDKEventBuilder): + def __init__( + self, + conversation: Conversation, + metadata: Any, + event_history: list[SDKEvent], + ) -> None: + super().__init__(conversation, metadata) + self.event_history = event_history + + def append(self, event: SDKEvent) -> None: + self.event_history.append(event) + super().append(event) + + def token_count(view: View, llm: LLM) -> int: return get_total_token_count(view.events, llm) @@ -168,7 +185,7 @@ def make_trajectory_record_from_conversation( def condensation_prompt_record_if_needed( *, - conversation: Conversation, + events: list[SDKEvent], condenser: LLMSummarizingCondenser, agent_llm: LLM, condenser_llm: PromptCapturingLLM, @@ -177,7 +194,7 @@ def condensation_prompt_record_if_needed( max_tokens: int, condensation_index: int, ) -> tuple[Condensation, dict[str, Any]] | None: - view = View.from_events(conversation.state.events) + view = View.from_events(events) prompt_token_count = token_count(view, condenser.llm) before_prompt_count = len(condenser_llm.captured_messages) condensation_result = condenser.condense(view, agent_llm=agent_llm) @@ -213,7 +230,28 @@ def append_standardized_events_with_condensation( include_trajectories: bool, ) -> list[dict[str, Any]]: metadata = load_dataset_metadata(dataset_name, required=True) - builder = SDKEventBuilder(conversation, metadata) + event_history: list[SDKEvent] = [ + SystemPromptEvent( + system_prompt=TextContent(text=conversation.agent.static_system_message), + tools=list(conversation.agent.tools_map.values()), + ) + ] + builder = TrackingSDKEventBuilder(conversation, metadata, event_history) + first_event = trajectory.content[0] + if not isinstance(first_event, TextObservation) or first_event.source != "user": + raise ValueError( + "OpenHands SDK condensation conversion expects the first event to be a " + "user TextObservation" + ) + builder.append( + MessageEvent( + source="user", + llm_message=Message( + role="user", + content=[TextContent(text=first_event.content)], + ), + ) + ) condenser_llm = PromptCapturingLLM( usage_id="openhands-sdk-condensation-sft-condenser", model=model, @@ -231,18 +269,18 @@ def append_standardized_events_with_condensation( condensation_index = 1 index = start_index batch_number = 0 - last_safe_events = list(conversation.state.events) + last_safe_events = list(event_history) def update_last_safe_events() -> None: nonlocal last_safe_events - view = View.from_events(conversation.state.events) + view = View.from_events(event_history) if token_count(view, conversation.agent.llm) <= max_tokens: - last_safe_events = list(conversation.state.events) + last_safe_events = list(event_history) def emit_condensation_boundary_if_needed() -> None: nonlocal segment_index, condensation_index, last_safe_events result = condensation_prompt_record_if_needed( - conversation=conversation, + events=event_history, condenser=condenser, agent_llm=conversation.agent.llm, condenser_llm=condenser_llm, @@ -266,8 +304,9 @@ def emit_condensation_boundary_if_needed() -> None: ) segment_index += 1 records.append(prompt_record) + event_history.append(condensation) conversation.state.events.append(condensation) - last_safe_events = list(conversation.state.events) + last_safe_events = list(event_history) condensation_index += 1 while index < len(trajectory.content): @@ -342,7 +381,7 @@ def process_row( with tempfile.TemporaryDirectory(prefix="openhands-sdk-condensation-sft-") as tmpdir: conversation = Conversation(agent=agent, workspace=tmpdir, visualizer=None) try: - conversation.send_message(first_event.content) + conversation._ensure_agent_ready() return append_standardized_events_with_condensation( conversation=conversation, trajectory=trajectory, From 1c1b7d1b1eae712c2e2e5e2d633ee1e969c8a4c6 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 1 Jun 2026 23:21:38 -0400 Subject: [PATCH 02/13] Add concurrent condenser SFT generation Co-authored-by: openhands --- agents/openhands_sdk/condensation_sft.py | 93 ++++++++++++++++++++---- 1 file changed, 78 insertions(+), 15 deletions(-) diff --git a/agents/openhands_sdk/condensation_sft.py b/agents/openhands_sdk/condensation_sft.py index dc14d5e8..80b44774 100644 --- a/agents/openhands_sdk/condensation_sft.py +++ b/agents/openhands_sdk/condensation_sft.py @@ -1,11 +1,12 @@ from __future__ import annotations import argparse +import asyncio import json import os import sys import tempfile -from collections.abc import Sequence +from collections.abc import Iterator, Sequence from typing import Any os.environ.setdefault("OPENHANDS_SUPPRESS_BANNER", "1") @@ -397,6 +398,60 @@ def process_row( conversation.close() +def iter_input_chunks(chunk_size: int) -> Iterator[list[str]]: + chunk: list[str] = [] + for line in sys.stdin: + line = line.strip() + if not line: + continue + chunk.append(line) + if len(chunk) >= chunk_size: + yield chunk + chunk = [] + if chunk: + yield chunk + + +async def process_line( + line: str, + *, + args: argparse.Namespace, + semaphore: asyncio.Semaphore, +) -> list[dict[str, Any]]: + async with semaphore: + return await asyncio.to_thread( + process_row, + line, + max_tokens=args.max_tokens, + model=args.model, + include_trajectories=args.include_trajectories == "yes", + max_size=args.max_size, + keep_first=args.keep_first, + ) + + +async def process_stream(args: argparse.Namespace) -> None: + from tqdm import tqdm + + semaphore = asyncio.Semaphore(args.concurrency) + progress = tqdm( + desc="condensation_sft", + unit="row", + dynamic_ncols=True, + disable=args.no_progress, + ) + try: + for chunk in iter_input_chunks(args.chunk_size): + tasks = [process_line(line, args=args, semaphore=semaphore) for line in chunk] + chunk_records = await asyncio.gather(*tasks) + for records in chunk_records: + for record in records: + print(json.dumps(record, ensure_ascii=False), flush=False) + progress.update(len(chunk)) + finally: + progress.close() + + def main() -> None: parser = argparse.ArgumentParser( description=( @@ -414,21 +469,29 @@ def main() -> None: default="yes", help="Whether to emit the original OpenHands SDK trajectory record before summaries.", ) + parser.add_argument( + "--concurrency", + type=int, + default=1, + help="Number of input trajectories to process concurrently.", + ) + parser.add_argument( + "--chunk-size", + type=int, + default=100, + help="Number of input rows to schedule per async batch.", + ) + parser.add_argument( + "--no-progress", + action="store_true", + help="Disable tqdm progress output on stderr.", + ) args = parser.parse_args() - for line in sys.stdin: - line = line.strip() - if not line: - continue - records = process_row( - line, - max_tokens=args.max_tokens, - model=args.model, - include_trajectories=args.include_trajectories == "yes", - max_size=args.max_size, - keep_first=args.keep_first, - ) - for record in records: - print(json.dumps(record, ensure_ascii=False)) + if args.concurrency < 1: + raise ValueError("--concurrency must be at least 1") + if args.chunk_size < 1: + raise ValueError("--chunk-size must be at least 1") + asyncio.run(process_stream(args)) if __name__ == "__main__": From 1b3d43f3a4a80052157d47e831a0aae017c9f2bd Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 1 Jun 2026 23:44:59 -0400 Subject: [PATCH 03/13] Stream concurrent condenser outputs as rows complete Co-authored-by: openhands --- agents/openhands_sdk/condensation_sft.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/agents/openhands_sdk/condensation_sft.py b/agents/openhands_sdk/condensation_sft.py index 80b44774..d1ab46ea 100644 --- a/agents/openhands_sdk/condensation_sft.py +++ b/agents/openhands_sdk/condensation_sft.py @@ -442,12 +442,15 @@ async def process_stream(args: argparse.Namespace) -> None: ) try: for chunk in iter_input_chunks(args.chunk_size): - tasks = [process_line(line, args=args, semaphore=semaphore) for line in chunk] - chunk_records = await asyncio.gather(*tasks) - for records in chunk_records: + tasks = [ + asyncio.create_task(process_line(line, args=args, semaphore=semaphore)) + for line in chunk + ] + for task in asyncio.as_completed(tasks): + records = await task for record in records: - print(json.dumps(record, ensure_ascii=False), flush=False) - progress.update(len(chunk)) + print(json.dumps(record, ensure_ascii=False), flush=True) + progress.update(1) finally: progress.close() From b483744ea542c49ed9092e273669ab5e520eb875 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Tue, 2 Jun 2026 00:14:38 -0400 Subject: [PATCH 04/13] Continue condenser generation after row errors Co-authored-by: openhands --- agents/openhands_sdk/condensation_sft.py | 45 +++++++++++++++++++----- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/agents/openhands_sdk/condensation_sft.py b/agents/openhands_sdk/condensation_sft.py index d1ab46ea..ad0cfc79 100644 --- a/agents/openhands_sdk/condensation_sft.py +++ b/agents/openhands_sdk/condensation_sft.py @@ -418,16 +418,38 @@ async def process_line( args: argparse.Namespace, semaphore: asyncio.Semaphore, ) -> list[dict[str, Any]]: - async with semaphore: - return await asyncio.to_thread( - process_row, - line, - max_tokens=args.max_tokens, - model=args.model, - include_trajectories=args.include_trajectories == "yes", - max_size=args.max_size, - keep_first=args.keep_first, + try: + async with semaphore: + return await asyncio.to_thread( + process_row, + line, + max_tokens=args.max_tokens, + model=args.model, + include_trajectories=args.include_trajectories == "yes", + max_size=args.max_size, + keep_first=args.keep_first, + ) + except Exception as exc: + if not args.continue_on_error: + raise + row_id = None + try: + row_id = json.loads(line).get("id") + except Exception: + pass + print( + json.dumps( + { + "id": row_id, + "error_type": type(exc).__name__, + "error": str(exc), + }, + ensure_ascii=False, + ), + file=sys.stderr, + flush=True, ) + return [] async def process_stream(args: argparse.Namespace) -> None: @@ -489,6 +511,11 @@ def main() -> None: action="store_true", help="Disable tqdm progress output on stderr.", ) + parser.add_argument( + "--continue-on-error", + action="store_true", + help="Log per-row conversion errors to stderr and continue processing remaining rows.", + ) args = parser.parse_args() if args.concurrency < 1: raise ValueError("--concurrency must be at least 1") From ea2cbfbaed91e26a1ba02ef876a2676800874c38 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Tue, 2 Jun 2026 01:07:12 -0400 Subject: [PATCH 05/13] Count condensed views when selecting safe trajectory snapshots Co-authored-by: openhands --- agents/openhands_sdk/condensation_sft.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/agents/openhands_sdk/condensation_sft.py b/agents/openhands_sdk/condensation_sft.py index ad0cfc79..54803406 100644 --- a/agents/openhands_sdk/condensation_sft.py +++ b/agents/openhands_sdk/condensation_sft.py @@ -120,6 +120,12 @@ def token_count(view: View, llm: LLM) -> int: return get_total_token_count(view.events, llm) +def formatted_token_count(events: Sequence[SDKEvent], llm: LLM) -> int: + view = View.from_events(events) + messages = LLMConvertibleEvent.events_to_messages(view.events) + return llm.get_token_count(messages) + + def make_condensation_prompt_record( *, trajectory_id: str, @@ -274,8 +280,7 @@ def append_standardized_events_with_condensation( def update_last_safe_events() -> None: nonlocal last_safe_events - view = View.from_events(event_history) - if token_count(view, conversation.agent.llm) <= max_tokens: + if formatted_token_count(event_history, conversation.agent.llm) <= max_tokens: last_safe_events = list(event_history) def emit_condensation_boundary_if_needed() -> None: From 473a1cb549a653a3bf9f3c897a46ccc672f6b7d2 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Tue, 2 Jun 2026 07:00:35 -0400 Subject: [PATCH 06/13] Truncate dataset tool observations for SDK SFT conversion Co-authored-by: openhands --- agents/openhands_sdk/std_to_sft.py | 8 +++++++- tests/test_openhands_sdk_sft_conversion.py | 12 ++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/agents/openhands_sdk/std_to_sft.py b/agents/openhands_sdk/std_to_sft.py index 0438b3d9..5817476d 100644 --- a/agents/openhands_sdk/std_to_sft.py +++ b/agents/openhands_sdk/std_to_sft.py @@ -33,6 +33,7 @@ from openhands.tools.file_editor import FileEditorTool from openhands.tools.task_tracker import TaskTrackerTool from openhands.tools.terminal import TerminalTool +from openhands.tools.terminal.definition import MAX_CMD_OUTPUT_SIZE, maybe_truncate from pydantic import SecretStr from schema.action.api import ApiAction @@ -86,7 +87,12 @@ class DatasetToolObservation(SDKObservation): @property def to_llm_content(self) -> Sequence[TextContent | ImageContent]: - return [TextContent(text=self.output)] + output = maybe_truncate( + content=self.output, + truncate_after=MAX_CMD_OUTPUT_SIZE, + tool_prefix="dataset_tool", + ) + return [TextContent(text=output)] class DatasetToolExecutor(ToolExecutor): diff --git a/tests/test_openhands_sdk_sft_conversion.py b/tests/test_openhands_sdk_sft_conversion.py index 6d497a2f..54611133 100644 --- a/tests/test_openhands_sdk_sft_conversion.py +++ b/tests/test_openhands_sdk_sft_conversion.py @@ -437,3 +437,15 @@ def test_openhands_sdk_converter_rejects_mysql_code_action(): with pytest.raises(ValueError, match="mysql"): std_to_sft.map_code_action(action) + + +def test_openhands_sdk_dataset_tool_observation_truncates_large_outputs(): + from openhands.tools.terminal.definition import MAX_CMD_OUTPUT_SIZE + + from agents.openhands_sdk.std_to_sft import DatasetToolObservation + + observation = DatasetToolObservation(output="A" * (MAX_CMD_OUTPUT_SIZE + 1000)) + [content] = observation.to_llm_content + + assert len(content.text) == MAX_CMD_OUTPUT_SIZE + assert "response clipped" in content.text From df710f9db3749167d913972d5d63a3c556f3e3cb Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Tue, 2 Jun 2026 07:35:54 -0400 Subject: [PATCH 07/13] Fix GAIR daVinci JSONL extraction Co-authored-by: openhands --- datasets/gair_davinci_dev/extract_raw.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datasets/gair_davinci_dev/extract_raw.py b/datasets/gair_davinci_dev/extract_raw.py index 3c60b89a..8f7dedbb 100644 --- a/datasets/gair_davinci_dev/extract_raw.py +++ b/datasets/gair_davinci_dev/extract_raw.py @@ -5,15 +5,15 @@ from datasets import load_dataset DATASET_NAME = "GAIR/daVinci-Dev" -CONFIG_NAME = "env_native" +DATA_FILE = "hf://datasets/GAIR/daVinci-Dev/env-native.jsonl" SPLIT = "train" def main(): token = os.getenv("HF_TOKEN") or None dataset = load_dataset( - DATASET_NAME, - CONFIG_NAME, + "json", + data_files=DATA_FILE, split=SPLIT, streaming=True, token=token, @@ -27,7 +27,7 @@ def main(): main() except Exception as exc: print( - f"Failed to stream {DATASET_NAME}/{CONFIG_NAME}. The dataset is gated; " + f"Failed to stream {DATA_FILE}. The dataset is gated; " "authenticate with Hugging Face and ensure access has been granted.", file=sys.stderr, ) From 2ec045a8bf4df3a51e115edcc58e1adc1a9419ae Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Tue, 2 Jun 2026 10:48:56 -0400 Subject: [PATCH 08/13] Require dataset metadata files Co-authored-by: openhands --- .github/workflows/check_api_docstrings.yml | 14 +- agents/openhands_v0/api.py | 123 +++--- agents/openhands_v0/convert_api_to_mcp.py | 20 +- agents/sweagent/api.py | 121 +++--- datasets/CharlieDreemur_OpenManus-RL/api.py | 125 ------ .../CharlieDreemur_OpenManus-RL/metadata.json | 273 +++++++++++++ datasets/SALT-NLP_SWE-chat/api.py | 50 --- datasets/SALT-NLP_SWE-chat/metadata.json | 32 ++ datasets/agenttuning_alfworld/api.py | 184 --------- datasets/agenttuning_db/metadata.json | 5 + datasets/agenttuning_kg/api.py | 78 ---- datasets/agenttuning_webshop/api.py | 25 -- datasets/allenai_Sera-4.6-Lite-T2/api.py | 14 - .../allenai_Sera-4.6-Lite-T2/metadata.json | 7 + datasets/android_in_the_wild/api.py | 45 --- datasets/android_in_the_wild/metadata.json | 37 ++ datasets/androidcontrol/api.py | 58 --- datasets/androidcontrol/metadata.json | 91 +++++ datasets/code_feedback/metadata.json | 7 + datasets/codeactinstruct/api.py | 119 ------ datasets/codeactinstruct/metadata.json | 249 ++++++++++++ datasets/coderforge_preview/api.py | 34 -- datasets/coderforge_preview/metadata.json | 7 + datasets/codescout/metadata.json | 7 + datasets/cognitivekernel_pro_sft/api.py | 51 --- .../cognitivekernel_pro_sft/metadata.json | 123 ++++++ datasets/dolci_instruct_sft_tool_use/api.py | 121 ------ .../dolci_instruct_sft_tool_use/metadata.json | 297 ++++++++++++++ datasets/eto/metadata.json | 5 + datasets/gair_davinci_dev/api.py | 26 -- datasets/gair_davinci_dev/metadata.json | 7 + datasets/go-browse-wa/api.py | 180 --------- datasets/go-browse-wa/metadata.json | 96 +++++ datasets/hybrid-gym/api.py | 32 -- datasets/hybrid-gym/metadata.json | 55 +++ datasets/jupyter-agent-dataset/metadata.json | 7 + .../metadata.json | 7 + datasets/llava_plus/api.py | 20 - datasets/llava_plus/metadata.json | 44 +++ datasets/logicstar_swe-star/api.py | 34 -- datasets/logicstar_swe-star/metadata.json | 65 ++++ datasets/mind2web/api.py | 44 --- datasets/mind2web/metadata.json | 90 +++++ datasets/mini-coder/metadata.json | 7 + datasets/miroverse_v0_1/api.py | 356 ----------------- datasets/miroverse_v0_1/metadata.json | 321 +++++++++++++++ datasets/nebius_SWE-agent-trajectories/api.py | 121 ------ .../api.py | 49 --- .../metadata.json | 80 ++++ .../nemotron_terminal_corpus/metadata.json | 7 + datasets/nnetnav-live/api.py | 149 ------- datasets/nnetnav-live/metadata.json | 97 +++++ datasets/nnetnav-wa/api.py | 149 ------- datasets/nnetnav-wa/metadata.json | 93 +++++ .../api.py | 34 -- .../metadata.json | 57 +++ datasets/omniact/metadata.json | 7 + datasets/openhands/api.py | 366 ------------------ datasets/openhands/metadata.json | 207 ++++++++++ datasets/openresearcher/api.py | 20 - datasets/openresearcher/metadata.json | 78 ++++ datasets/openthoughts_tb_dev/metadata.json | 7 + datasets/orca_agentinstruct/metadata.json | 5 + datasets/scale_swe_distilled/api.py | 38 -- datasets/scale_swe_distilled/metadata.json | 67 ++++ datasets/screenagent/metadata.json | 5 + .../api.py | 23 -- .../metadata.json | 7 + datasets/swe-play-trajectories/api.py | 66 ---- datasets/swe-play-trajectories/metadata.json | 62 +++ datasets/swe-smith/api.py | 31 -- datasets/swe-smith/metadata.json | 7 + datasets/synatra/api.py | 82 ---- datasets/synatra/metadata.json | 25 ++ datasets/toolmind/api.py | 33 -- datasets/toolmind/metadata.json | 100 +++++ datasets/toucan_1_5m/api.py | 165 -------- datasets/toucan_1_5m/metadata.json | 135 +++++++ datasets/turkingbench/api.py | 56 --- datasets/turkingbench/metadata.json | 44 +++ datasets/webarena_successful/api.py | 114 ------ datasets/webarena_successful/metadata.json | 25 ++ datasets/weblinx/api.py | 67 ---- datasets/weblinx/metadata.json | 113 ++++++ datasets/wonderbread/api.py | 47 --- datasets/wonderbread/metadata.json | 71 ++++ tests/test_dataset_structure.py | 17 + tests/test_standardized_schemas.py | 57 ++- 88 files changed, 3308 insertions(+), 3388 deletions(-) delete mode 100644 datasets/CharlieDreemur_OpenManus-RL/api.py create mode 100644 datasets/CharlieDreemur_OpenManus-RL/metadata.json delete mode 100644 datasets/SALT-NLP_SWE-chat/api.py create mode 100644 datasets/SALT-NLP_SWE-chat/metadata.json delete mode 100644 datasets/agenttuning_alfworld/api.py create mode 100644 datasets/agenttuning_db/metadata.json delete mode 100644 datasets/agenttuning_kg/api.py delete mode 100644 datasets/agenttuning_webshop/api.py delete mode 100644 datasets/allenai_Sera-4.6-Lite-T2/api.py create mode 100644 datasets/allenai_Sera-4.6-Lite-T2/metadata.json delete mode 100644 datasets/android_in_the_wild/api.py create mode 100644 datasets/android_in_the_wild/metadata.json delete mode 100644 datasets/androidcontrol/api.py create mode 100644 datasets/androidcontrol/metadata.json create mode 100644 datasets/code_feedback/metadata.json delete mode 100644 datasets/codeactinstruct/api.py create mode 100644 datasets/codeactinstruct/metadata.json delete mode 100644 datasets/coderforge_preview/api.py create mode 100644 datasets/coderforge_preview/metadata.json create mode 100644 datasets/codescout/metadata.json delete mode 100644 datasets/cognitivekernel_pro_sft/api.py create mode 100644 datasets/cognitivekernel_pro_sft/metadata.json delete mode 100644 datasets/dolci_instruct_sft_tool_use/api.py create mode 100644 datasets/dolci_instruct_sft_tool_use/metadata.json create mode 100644 datasets/eto/metadata.json delete mode 100644 datasets/gair_davinci_dev/api.py create mode 100644 datasets/gair_davinci_dev/metadata.json delete mode 100644 datasets/go-browse-wa/api.py create mode 100644 datasets/go-browse-wa/metadata.json delete mode 100644 datasets/hybrid-gym/api.py create mode 100644 datasets/hybrid-gym/metadata.json create mode 100644 datasets/jupyter-agent-dataset/metadata.json create mode 100644 datasets/kwai-klear_swe-smith-mini_swe_agent_plus-trajectories-66k/metadata.json delete mode 100644 datasets/llava_plus/api.py create mode 100644 datasets/llava_plus/metadata.json delete mode 100644 datasets/logicstar_swe-star/api.py create mode 100644 datasets/logicstar_swe-star/metadata.json delete mode 100644 datasets/mind2web/api.py create mode 100644 datasets/mind2web/metadata.json create mode 100644 datasets/mini-coder/metadata.json delete mode 100644 datasets/miroverse_v0_1/api.py create mode 100644 datasets/miroverse_v0_1/metadata.json delete mode 100644 datasets/nebius_SWE-agent-trajectories/api.py delete mode 100644 datasets/nebius_SWE-rebench-openhands-trajectories/api.py create mode 100644 datasets/nebius_SWE-rebench-openhands-trajectories/metadata.json create mode 100644 datasets/nemotron_terminal_corpus/metadata.json delete mode 100644 datasets/nnetnav-live/api.py create mode 100644 datasets/nnetnav-live/metadata.json delete mode 100644 datasets/nnetnav-wa/api.py create mode 100644 datasets/nnetnav-wa/metadata.json delete mode 100644 datasets/nvidia_SWE-Zero-openhands-trajectories/api.py create mode 100644 datasets/nvidia_SWE-Zero-openhands-trajectories/metadata.json create mode 100644 datasets/omniact/metadata.json delete mode 100644 datasets/openhands/api.py create mode 100644 datasets/openhands/metadata.json delete mode 100644 datasets/openresearcher/api.py create mode 100644 datasets/openresearcher/metadata.json create mode 100644 datasets/openthoughts_tb_dev/metadata.json create mode 100644 datasets/orca_agentinstruct/metadata.json delete mode 100644 datasets/scale_swe_distilled/api.py create mode 100644 datasets/scale_swe_distilled/metadata.json create mode 100644 datasets/screenagent/metadata.json delete mode 100644 datasets/swe-gym_openhands_sampled_trajectories/api.py create mode 100644 datasets/swe-gym_openhands_sampled_trajectories/metadata.json delete mode 100644 datasets/swe-play-trajectories/api.py create mode 100644 datasets/swe-play-trajectories/metadata.json delete mode 100644 datasets/swe-smith/api.py create mode 100644 datasets/swe-smith/metadata.json delete mode 100644 datasets/synatra/api.py create mode 100644 datasets/synatra/metadata.json delete mode 100644 datasets/toolmind/api.py create mode 100644 datasets/toolmind/metadata.json delete mode 100644 datasets/toucan_1_5m/api.py create mode 100644 datasets/toucan_1_5m/metadata.json delete mode 100644 datasets/turkingbench/api.py create mode 100644 datasets/turkingbench/metadata.json delete mode 100644 datasets/webarena_successful/api.py create mode 100644 datasets/webarena_successful/metadata.json delete mode 100644 datasets/weblinx/api.py create mode 100644 datasets/weblinx/metadata.json delete mode 100644 datasets/wonderbread/api.py create mode 100644 datasets/wonderbread/metadata.json diff --git a/.github/workflows/check_api_docstrings.yml b/.github/workflows/check_api_docstrings.yml index e58ca872..89747588 100644 --- a/.github/workflows/check_api_docstrings.yml +++ b/.github/workflows/check_api_docstrings.yml @@ -1,4 +1,4 @@ -name: Check Docstrings +name: Check Dataset Metadata on: push: @@ -9,7 +9,7 @@ on: - main jobs: - check_docstrings: + check_dataset_metadata: runs-on: ubuntu-latest steps: @@ -21,10 +21,12 @@ jobs: with: python-version: '3.12' - - name: Install ruff + - name: Install dependencies run: | - python -m pip install ruff + python -m pip install --upgrade pip + pip install pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Check for docstrings + - name: Check dataset metadata run: | - ruff check datasets/*/api.py --select D --ignore D100,D203,D213 + pytest tests/test_dataset_structure.py diff --git a/agents/openhands_v0/api.py b/agents/openhands_v0/api.py index 59087174..d58bc06c 100644 --- a/agents/openhands_v0/api.py +++ b/agents/openhands_v0/api.py @@ -1,6 +1,4 @@ -import importlib.util -import inspect -import os +from schema.dataset_metadata import custom_tool_map, load_dataset_metadata openhands_v0_default_tools = { "execute_bash": {"required": ["command"], "optional": ["is_input"]}, @@ -77,6 +75,20 @@ def check_exclude_tools(name: str, required: list, optional: list, exclude_apis: return True +def _schema_signature(tool) -> tuple[str, list[str], list[str]]: + parameters = tool.function.parameters or {} + properties = parameters.get("properties", {}) or {} + required = list(parameters.get("required", []) or []) + optional = [name for name in properties if name not in required] + args = [*required, *(f"{name}=None" for name in optional)] + return f"({', '.join(args)})", required, optional + + +def _tool_docstring(tool) -> str: + description = tool.function.description or "" + return "\n" + description + + def get_api_tool_description( dataset, exclude_apis=None, env="execute_ipython_cell", include_apis=None ): @@ -91,71 +103,48 @@ def get_api_tool_description( else: include_api_names = None - api_file_path = os.path.expanduser(f"datasets/{dataset}/api.py") - API_TOOL_DESCRIPTION = "" - if os.path.exists(api_file_path): - spec = importlib.util.spec_from_file_location("api", api_file_path) - api_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(api_module) - functions = inspect.getmembers(api_module, inspect.isfunction) - if include_api_names is not None: - api_names = {name for name, _ in functions} - missing_api_names = sorted(include_api_names - api_names) - if missing_api_names: - raise ValueError( - f"available_apis contains functions not found in {api_file_path}: " - f"{missing_api_names}" - ) - sigs = {} - for name, func in functions: - if include_api_names is not None and name not in include_api_names: - continue - docstring = "\n" + (inspect.getdoc(func) or "") - sig = inspect.signature(func) - required = [] - optional = [] - for arg_name, param in sig.parameters.items(): - if param.default is inspect.Parameter.empty: - if arg_name == "xpath" or arg_name == "element_id": - arg_name = "bid" - if arg_name not in required: - required.append(arg_name) - else: - optional.append(arg_name) - if name in openhands_v0_default_tools and check_exclude_openhands_v0_default_tools( - name, sig, required, optional - ): - # print(f"excluded {name}", file=sys.stderr) - continue - if name in exclude_apis and check_exclude_tools(name, required, optional, exclude_apis): - # print(f"excluded {name}", file=sys.stderr) - continue - docstring = f"{name}{sig}" + docstring.replace("\n", "\n ") + "\n\n" - API_TOOL_DESCRIPTION += docstring - sigs[name] = {"required": required, "optional": optional} - if not API_TOOL_DESCRIPTION: - return "", {} - if exclude_apis: - also = "also " - else: - also = "" - prefixes = [ - f"The following pre-defined functions are {also}available in {env}. ", - f"The environment {env} {also}provides the following pre-defined functions: ", - f"In {env}, you can {also}use the following pre-defined functions: ", - f"Available functions in {env}: ", - f"The following functions are {also}defined and ready for use in {env}: ", - f"Note that {env} {also}supports the following pre-defined functions: ", - f"Below is a list of functions you can {also}use in the {env} environment. ", - f"The toolkit for {env} {also}contains the following functions. ", - ] - API_TOOL_DESCRIPTION = prefixes[0] + "\n\n" + API_TOOL_DESCRIPTION - API_TOOL_DESCRIPTION = API_TOOL_DESCRIPTION.replace("xpath", "bid").replace( - "element_id", "bid" - ) - return API_TOOL_DESCRIPTION, sigs - else: + metadata = load_dataset_metadata(dataset) + tools = custom_tool_map(metadata) + if include_api_names is not None: + missing_api_names = sorted(include_api_names - set(tools)) + if missing_api_names: + raise ValueError( + f"available_apis contains functions not found in metadata.json for " + f"{dataset}: {missing_api_names}" + ) + + api_tool_description = "" + sigs = {} + for name, tool in sorted(tools.items()): + if include_api_names is not None and name not in include_api_names: + continue + sig, required, optional = _schema_signature(tool) + if name in openhands_v0_default_tools and check_exclude_openhands_v0_default_tools( + name, sig, required, optional + ): + continue + if name in exclude_apis and check_exclude_tools(name, required, optional, exclude_apis): + continue + docstring = f"{name}{sig}" + _tool_docstring(tool).replace("\n", "\n ") + "\n\n" + api_tool_description += docstring + sigs[name] = {"required": required, "optional": optional} + + if not api_tool_description: return "", {} + also = "also " if exclude_apis else "" + prefixes = [ + f"The following pre-defined functions are {also}available in {env}. ", + f"The environment {env} {also}provides the following pre-defined functions: ", + f"In {env}, you can {also}use the following pre-defined functions: ", + f"Available functions in {env}: ", + f"The following functions are {also}defined and ready for use in {env}: ", + f"Note that {env} {also}supports the following pre-defined functions: ", + f"Below is a list of functions you can {also}use in the {env} environment. ", + f"The toolkit for {env} {also}contains the following functions. ", + ] + api_tool_description = prefixes[0] + "\n\n" + api_tool_description + api_tool_description = api_tool_description.replace("xpath", "bid").replace("element_id", "bid") + return api_tool_description, sigs def get_language_descriptions(languages): diff --git a/agents/openhands_v0/convert_api_to_mcp.py b/agents/openhands_v0/convert_api_to_mcp.py index 1b64984b..501145dd 100644 --- a/agents/openhands_v0/convert_api_to_mcp.py +++ b/agents/openhands_v0/convert_api_to_mcp.py @@ -1,6 +1,4 @@ -import importlib.util import inspect -import os import textwrap from typing import ( Any, @@ -10,6 +8,8 @@ from pydantic import TypeAdapter +from schema.dataset_metadata import custom_tool_map, load_dataset_metadata + def json_type_from_py(py_t: Any) -> dict: """Generate JSON schema from Python type using Pydantic's TypeAdapter.""" @@ -121,18 +121,10 @@ def tool_from_function( def get_api_tools(dataset) -> dict: - api_file_path = os.path.expanduser(f"datasets/{dataset}/api.py") - if os.path.exists(api_file_path): - api_tools = {} - spec = importlib.util.spec_from_file_location("api", api_file_path) - api_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(api_module) - functions = inspect.getmembers(api_module, inspect.isfunction) - for name, func in functions: - api_tools[name] = tool_from_function(func) - return api_tools - else: - return {} + metadata = load_dataset_metadata(dataset) + return { + name: tool.model_dump(exclude_none=True) for name, tool in custom_tool_map(metadata).items() + } def language_tool_placeholder(code: str): diff --git a/agents/sweagent/api.py b/agents/sweagent/api.py index 9d31073d..92f22a2b 100644 --- a/agents/sweagent/api.py +++ b/agents/sweagent/api.py @@ -1,6 +1,4 @@ -import importlib.util -import inspect -import os +from schema.dataset_metadata import custom_tool_map, load_dataset_metadata sweagent_default_tools = { "bash": {"required": ["command"], "optional": []}, @@ -52,6 +50,20 @@ def check_exclude_tools(name: str, required: list, optional: list, exclude_apis: return True +def _schema_signature(tool) -> tuple[str, list[str], list[str]]: + parameters = tool.function.parameters or {} + properties = parameters.get("properties", {}) or {} + required = list(parameters.get("required", []) or []) + optional = [name for name in properties if name not in required] + args = [*required, *(f"{name}=None" for name in optional)] + return f"({', '.join(args)})", required, optional + + +def _tool_docstring(tool) -> str: + description = tool.function.description or "" + return "\n" + description + + def get_api_tool_description(dataset, exclude_apis=None, env="bash", include_apis=None): if exclude_apis is None: exclude_apis = {} @@ -64,66 +76,45 @@ def get_api_tool_description(dataset, exclude_apis=None, env="bash", include_api else: include_api_names = None - api_file_path = os.path.expanduser(f"datasets/{dataset}/api.py") - API_TOOL_DESCRIPTION = "" - if os.path.exists(api_file_path): - spec = importlib.util.spec_from_file_location("api", api_file_path) - api_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(api_module) - functions = inspect.getmembers(api_module, inspect.isfunction) - if include_api_names is not None: - api_names = {name for name, _ in functions} - missing_api_names = sorted(include_api_names - api_names) - if missing_api_names: - raise ValueError( - f"available_apis contains functions not found in {api_file_path}: " - f"{missing_api_names}" - ) - sigs = {} - for name, func in functions: - if include_api_names is not None and name not in include_api_names: - continue - docstring = "\n" + (inspect.getdoc(func) or "") - sig = inspect.signature(func) - required = [] - optional = [] - for arg_name, param in sig.parameters.items(): - if param.default is inspect.Parameter.empty: - if arg_name not in required: - required.append(arg_name) - else: - optional.append(arg_name) - if name in sweagent_default_tools and check_exclude_sweagent_default_tools( - name, sig, required, optional - ): - # print(f"excluded {name}") - continue - if name in exclude_apis and check_exclude_tools(name, required, optional, exclude_apis): - # print(f"excluded {name}") - continue - docstring = f"{name}{sig}" + docstring.replace("\n", "\n ") + "\n\n" - API_TOOL_DESCRIPTION += docstring - sigs[name] = {"required": required, "optional": optional} - if not API_TOOL_DESCRIPTION: - return "", {} - if exclude_apis: - also = "also " - else: - also = "" - prefixes = [ - f"The following pre-defined functions are {also}available in {env}. ", - f"The environment {env} {also}provides the following pre-defined functions: ", - f"In {env}, you can {also}use the following pre-defined functions: ", - f"Available functions in {env}: ", - f"The following functions are {also}defined and ready for use in {env}: ", - f"Note that {env} {also}supports the following pre-defined functions: ", - f"Below is a list of functions you can {also}use in the {env} environment. ", - f"The toolkit for {env} {also}contains the following functions. ", - ] - API_TOOL_DESCRIPTION = prefixes[0] + "\n\n" + API_TOOL_DESCRIPTION - API_TOOL_DESCRIPTION = API_TOOL_DESCRIPTION.replace("xpath", "bid").replace( - "element_id", "bid" - ) - return API_TOOL_DESCRIPTION, sigs - else: + metadata = load_dataset_metadata(dataset) + tools = custom_tool_map(metadata) + if include_api_names is not None: + missing_api_names = sorted(include_api_names - set(tools)) + if missing_api_names: + raise ValueError( + f"available_apis contains functions not found in metadata.json for " + f"{dataset}: {missing_api_names}" + ) + + api_tool_description = "" + sigs = {} + for name, tool in sorted(tools.items()): + if include_api_names is not None and name not in include_api_names: + continue + sig, required, optional = _schema_signature(tool) + if name in sweagent_default_tools and check_exclude_sweagent_default_tools( + name, sig, required, optional + ): + continue + if name in exclude_apis and check_exclude_tools(name, required, optional, exclude_apis): + continue + docstring = f"{name}{sig}" + _tool_docstring(tool).replace("\n", "\n ") + "\n\n" + api_tool_description += docstring + sigs[name] = {"required": required, "optional": optional} + + if not api_tool_description: return "", {} + also = "also " if exclude_apis else "" + prefixes = [ + f"The following pre-defined functions are {also}available in {env}. ", + f"The environment {env} {also}provides the following pre-defined functions: ", + f"In {env}, you can {also}use the following pre-defined functions: ", + f"Available functions in {env}: ", + f"The following functions are {also}defined and ready for use in {env}: ", + f"Note that {env} {also}supports the following pre-defined functions: ", + f"Below is a list of functions you can {also}use in the {env} environment. ", + f"The toolkit for {env} {also}contains the following functions. ", + ] + api_tool_description = prefixes[0] + "\n\n" + api_tool_description + api_tool_description = api_tool_description.replace("xpath", "bid").replace("element_id", "bid") + return api_tool_description, sigs diff --git a/datasets/CharlieDreemur_OpenManus-RL/api.py b/datasets/CharlieDreemur_OpenManus-RL/api.py deleted file mode 100644 index ab649c57..00000000 --- a/datasets/CharlieDreemur_OpenManus-RL/api.py +++ /dev/null @@ -1,125 +0,0 @@ -from typing import Any - - -def perform_action(action: str) -> dict: - """Execute a text action in an interactive environment. - - Args: - ---- - action: The environment action to perform, such as "go to desk 1". - - """ - pass - - -def get_search_movie(movie_name: Any) -> dict: - """Search for a movie by name and return basic details.""" - pass - - -def get_movie_details(movie_id: Any) -> dict: - """Get detailed information about a movie by ID.""" - pass - - -def get_movie_production_companies(movie_id: Any) -> dict: - """Get the production companies of a movie by its ID.""" - pass - - -def get_movie_production_countries(movie_id: Any) -> dict: - """Get the production countries of a movie by its ID.""" - pass - - -def get_movie_cast(movie_id: Any) -> dict: - """Retrieve the top cast members from a movie by its ID.""" - pass - - -def get_movie_crew(movie_id: Any) -> dict: - """Retrieve crew members from a movie by its ID.""" - pass - - -def get_movie_keywords(movie_id: Any) -> dict: - """Get the keywords associated with a movie by ID.""" - pass - - -def get_search_person(person_name: Any) -> dict: - """Search for a person by name.""" - pass - - -def get_person_details(person_id: Any) -> dict: - """Get detailed information about a person by ID.""" - pass - - -def get_person_cast(person_id: Any) -> dict: - """Retrieve movie cast roles for a person by their ID.""" - pass - - -def get_person_crew(person_id: Any) -> dict: - """Retrieve movie crew roles for a person by their ID.""" - pass - - -def get_person_external_ids(person_id: Any) -> dict: - """Get the external IDs for a person by ID.""" - pass - - -def get_movie_alternative_titles(movie_id: Any) -> dict: - """Get alternative titles for a movie by ID.""" - pass - - -def get_movie_translation(movie_id: Any) -> dict: - """Get description translations for a movie by ID.""" - pass - - -def check_valid_actions() -> dict: - """Get supported actions for the current tool.""" - pass - - -def weather_get_120_hour_forecast_for_weather( - lat: Any, - lon: Any, - lang: Any = None, - hours: Any = None, - units: Any = None, -) -> dict: - """Return a weather forecast for up to 120 hours. - - Original tool name: weather.get_120_hour_forecast_for_weather. - """ - pass - - -def pharmacies_de_garde_nc_health_for_pharmacies_de_garde_nc() -> dict: - """Return the health status of the Pharmacies de garde NC application. - - Original tool name: pharmacies_de_garde_nc.health_for_pharmacies_de_garde_nc. - """ - pass - - -def pharmacies_de_garde_nc_all_for_pharmacies_de_garde_nc() -> dict: - """Return pharmacies de garde in Nouvelle-Calédonie. - - Original tool name: pharmacies_de_garde_nc.all_for_pharmacies_de_garde_nc. - """ - pass - - -def app_store_new_free_ios_apps_for_app_store() -> dict: - """Get a list of new free iOS apps. - - Original tool name: app_store.new_free_ios_apps_for_app_store. - """ - pass diff --git a/datasets/CharlieDreemur_OpenManus-RL/metadata.json b/datasets/CharlieDreemur_OpenManus-RL/metadata.json new file mode 100644 index 00000000..dab281a2 --- /dev/null +++ b/datasets/CharlieDreemur_OpenManus-RL/metadata.json @@ -0,0 +1,273 @@ +{ + "custom_tools": [ + { + "type": "function", + "function": { + "name": "get_movie_production_companies", + "description": "Get the production companies of a movie by its ID.", + "parameters": { + "type": "object", + "properties": { + "movie_id": {} + }, + "additionalProperties": false, + "required": [ + "movie_id" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "get_search_movie", + "description": "Search for a movie by name and return basic details.", + "parameters": { + "type": "object", + "properties": { + "movie_name": {} + }, + "additionalProperties": false, + "required": [ + "movie_name" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "perform_action", + "description": "Execute a text action in an interactive environment.\n\nArgs:\n----\n action: The environment action to perform, such as \"go to desk 1\".", + "parameters": { + "type": "object", + "properties": { + "action": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "action" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "pharmacies_de_garde_nc_all_for_pharmacies_de_garde_nc", + "description": "Return pharmacies de garde in Nouvelle-Calédonie.\n\nOriginal tool name: pharmacies_de_garde_nc.all_for_pharmacies_de_garde_nc.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "pharmacies_de_garde_nc_health_for_pharmacies_de_garde_nc", + "description": "Return the health status of the Pharmacies de garde NC application.\n\nOriginal tool name: pharmacies_de_garde_nc.health_for_pharmacies_de_garde_nc.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "weather_get_120_hour_forecast_for_weather", + "description": "Return a weather forecast for up to 120 hours.\n\nOriginal tool name: weather.get_120_hour_forecast_for_weather.", + "parameters": { + "type": "object", + "properties": { + "lat": {}, + "lon": {}, + "lang": {}, + "hours": {}, + "units": {} + }, + "additionalProperties": false, + "required": [ + "lat", + "lon" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "app_store_new_free_ios_apps_for_app_store", + "description": "Dataset tool app_store_new_free_ios_apps_for_app_store.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "check_valid_actions", + "description": "Dataset tool check_valid_actions.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "get_movie_alternative_titles", + "description": "Dataset tool get_movie_alternative_titles.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "get_movie_cast", + "description": "Dataset tool get_movie_cast.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "get_movie_crew", + "description": "Dataset tool get_movie_crew.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "get_movie_details", + "description": "Dataset tool get_movie_details.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "get_movie_keywords", + "description": "Dataset tool get_movie_keywords.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "get_movie_production_countries", + "description": "Dataset tool get_movie_production_countries.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "get_movie_translation", + "description": "Dataset tool get_movie_translation.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "get_person_cast", + "description": "Dataset tool get_person_cast.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "get_person_crew", + "description": "Dataset tool get_person_crew.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "get_person_details", + "description": "Dataset tool get_person_details.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "get_person_external_ids", + "description": "Dataset tool get_person_external_ids.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "get_search_person", + "description": "Dataset tool get_search_person.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + } + ], + "code_enabled": [], + "browser_enabled": false +} diff --git a/datasets/SALT-NLP_SWE-chat/api.py b/datasets/SALT-NLP_SWE-chat/api.py deleted file mode 100644 index 47fcfff2..00000000 --- a/datasets/SALT-NLP_SWE-chat/api.py +++ /dev/null @@ -1,50 +0,0 @@ -from typing import Any - - -def str_replace_editor( - command: str, - path: str, - file_text: str | None = None, - old_str: str | None = None, - new_str: str | None = None, - insert_line: int | None = None, - view_range: list | None = None, -) -> None: - """View, create, and edit files with a custom editing tool. - - Args: - ---- - command: One of `view`, `create`, `str_replace`, `insert`, or `undo_edit`. - path: Absolute path to the target file or directory. - file_text: Content for `create` commands. - old_str: Existing text for `str_replace` commands. - new_str: Replacement text or inserted text. - insert_line: Line after which to insert text. - view_range: Optional `[start_line, end_line]` range to view. - - """ - pass - - -def think(thought: str) -> None: - """Record a private reasoning step. - - Args: - ---- - thought: The model's reasoning trace. - - """ - pass - - -def generic_tool(tool_name: str, tool_input: dict[str, Any], content: str | None = None) -> None: - """Represent a source-specific coding-agent tool call. - - Args: - ---- - tool_name: Original SWE-chat tool name. - tool_input: Parsed tool input parameters. - content: Raw tool-call content when no structured input is available. - - """ - pass diff --git a/datasets/SALT-NLP_SWE-chat/metadata.json b/datasets/SALT-NLP_SWE-chat/metadata.json new file mode 100644 index 00000000..f724f548 --- /dev/null +++ b/datasets/SALT-NLP_SWE-chat/metadata.json @@ -0,0 +1,32 @@ +{ + "custom_tools": [ + { + "type": "function", + "function": { + "name": "generic_tool", + "description": "Represent a source-specific coding-agent tool call.\n\nArgs:\n----\n tool_name: Original SWE-chat tool name.\n tool_input: Parsed tool input parameters.\n content: Raw tool-call content when no structured input is available.", + "parameters": { + "type": "object", + "properties": { + "tool_name": { + "type": "string" + }, + "tool_input": { + "type": "object" + }, + "content": {} + }, + "additionalProperties": false, + "required": [ + "tool_name", + "tool_input" + ] + } + } + } + ], + "code_enabled": [ + "bash" + ], + "browser_enabled": false +} diff --git a/datasets/agenttuning_alfworld/api.py b/datasets/agenttuning_alfworld/api.py deleted file mode 100644 index 39c26889..00000000 --- a/datasets/agenttuning_alfworld/api.py +++ /dev/null @@ -1,184 +0,0 @@ -def go(location: str): - """Move to the specified location. - - Args: - location (str): The target location to move to. - - Example: - go("bed 1") - - """ - pass - - -def take(item: str, source: str): - """Pick up an item from a specified source location. - - Args: - item (str): The item to pick up. - source (str): The location from which to take the item. - - Example: - take("laptop 1", "diningtable 1") - - """ - pass - - -def put(item: str, target: str): - """Place an item onto or into a specified target. - - Args: - item (str): The item to place. - target (str): The surface or container to place the item in/on. - - Example: - put("laptop 1", "bed 1") - - """ - pass - - -def open(obj: str): - """Open a specified container or object. - - Args: - obj (str): The object to open (e.g., drawer, door). - - Example: - open("drawer 1") - - """ - pass - - -def heat(item: str, appliance: str): - """Heat an item using a specified appliance. - - Args: - item (str): The item to be heated (e.g., "plate 1"). - appliance (str): The appliance to use for heating (e.g., "microwave 1"). - - Example: - heat("plate 1", "microwave 1") - - """ - pass - - -def examine(obj: str): - """Inspect or look closely at an object in the environment. - - Args: - obj (str): The object to examine (e.g., "dresser 1"). - - Example: - examine("dresser 1") - - """ - pass - - -def cool(item: str, appliance: str): - """Cool an item using a specified appliance. - - Args: - item (str): The item to be cooled (e.g., "plate 1"). - appliance (str): The appliance to use for cooling (e.g., "fridge 1"). - - Example: - cool("plate 1", "fridge 1") - - """ - pass - - -def use(obj: str): - """Use or activate a specified object or appliance. - - Args: - obj (str): The object to use (e.g., "desklamp 1"). - - Example: - use("desklamp 1") - - """ - pass - - -def close(obj: str): - """Close a specified container or object. - - Args: - obj (str): The object to close (e.g., "fridge 1"). - - Example: - close("fridge 1") - - """ - pass - - -def clean(item: str, appliance: str): - """Clean an item using a specified appliance. - - Args: - item (str): The item to be cleaned (e.g., "ladle 2"). - appliance (str): The appliance used for cleaning (e.g., "sinkbasin 1"). - - Example: - clean("ladle 2", "sinkbasin 1") - - """ - pass - - -def report_problem(obj: str): - """Report an issue with a specified object in the environment. - - Args: - obj (str): The object with a problem (e.g., "toilet 1"). - - Example: - report_problem("toilet 1") - - """ - pass - - -def inventory(): - """Check currently held items. - - This function retrieves and lists the objects currently in possession. - - Example: - inventory() - - """ - pass - - -def look(): - """Survey the surroundings to get a description of the current environment. - - This function allows the agent to observe visible objects and locations nearby. - - Example: - look() - - """ - pass - - -def look_at_under(item: str, reference: str): - """Look closely at an item that is located under a specified object. - - Args: - item (str): The item to examine (e.g., "cellphone 1"). - reference (str): The object under which the item is located (e.g., "desklamp 1"). - - Example: - look_at_under("cellphone 1", "desklamp 1") - - """ - pass diff --git a/datasets/agenttuning_db/metadata.json b/datasets/agenttuning_db/metadata.json new file mode 100644 index 00000000..dfa26e78 --- /dev/null +++ b/datasets/agenttuning_db/metadata.json @@ -0,0 +1,5 @@ +{ + "custom_tools": [], + "code_enabled": [], + "browser_enabled": false +} diff --git a/datasets/agenttuning_kg/api.py b/datasets/agenttuning_kg/api.py deleted file mode 100644 index 2a82ca87..00000000 --- a/datasets/agenttuning_kg/api.py +++ /dev/null @@ -1,78 +0,0 @@ -def get_relations(variable: str): - """Get all relations connected to an entity or variable in the knowledge base. - - This function helps to explore the knowledge graph by retrieving all relations - (i.e., edges) that are associated with the given variable, which can be either - a concrete entity (e.g., "Barack Obama") or a variable placeholder (e.g., "#0"). - - Example: get_relations("Barack Obama") - """ - pass - - -def get_neighbors(variable: str, relation: str): - """Get all entities connected to a variable via a specific relation. - - This function retrieves a new variable containing all entities that are - connected to the input variable by the given relation. This is typically - used after get_relations to determine which relation to follow. - - Example: get_neighbors("Barack Obama", "people.person.profession") - """ - pass - - -def intersection(variable1: str, variable2: str): - """Compute the intersection of two variables. - - This function returns a new variable that includes only the entities - shared between the two input variables. The input variables must be - of the same type. - - Example: intersection("#1", "#2") - """ - pass - - -def get_attributes(variable: str): - """Get all numerical attributes of a variable. - - This function helps to identify which attributes can be used in a - superlative query (e.g., max/min age). Only use this when a question - involves ranking or finding extremums. - - Example: get_attributes("#3") - """ - pass - - -def argmax(variable: str, attribute: str): - """Return the entity with the maximum value of the given attribute. - - Use this function to find the entity with the highest value for the - specified attribute within a variable. Requires attributes to be known. - - Example: argmax("#2", "age") - """ - pass - - -def argmin(variable: str, attribute: str): - """Return the entity with the minimum value of the given attribute. - - Use this function to find the entity with the lowest value for the - specified attribute within a variable. Requires attributes to be known. - - Example: argmin("#2", "age") - """ - pass - - -def count(variable: str): - """Count the number of entities in a variable. - - Returns the number of distinct entities represented by the variable. - - Example: count("#4") - """ - pass diff --git a/datasets/agenttuning_webshop/api.py b/datasets/agenttuning_webshop/api.py deleted file mode 100644 index b1aab5eb..00000000 --- a/datasets/agenttuning_webshop/api.py +++ /dev/null @@ -1,25 +0,0 @@ -def search(keywords: str): - """Perform a search on the web interface using the specified keywords. - - Args: - keywords (str): The search query string. - - Example: - search("3.25 ounce (pack of 3) protein serving jerky price < 50.00") - - """ - pass - - -def click(element: str): - """Click an element on the webpage by its visible label or ID. - - Args: - element (str): The label or identifier of the clickable item. - - Example: - click("B0977H69D1") - click("Buy Now") - - """ - pass diff --git a/datasets/allenai_Sera-4.6-Lite-T2/api.py b/datasets/allenai_Sera-4.6-Lite-T2/api.py deleted file mode 100644 index d98981f0..00000000 --- a/datasets/allenai_Sera-4.6-Lite-T2/api.py +++ /dev/null @@ -1,14 +0,0 @@ -from typing import List, Literal, Optional - - -def str_replace_editor( - command: Literal["view", "create", "str_replace", "insert", "undo_edit"], - path: str, - file_text: Optional[str] = None, - old_str: Optional[str] = None, - new_str: Optional[str] = None, - insert_line: Optional[int] = None, - view_range: Optional[List[int]] = None, -) -> None: - """View, create, and edit files with the OpenHands editor tool.""" - pass diff --git a/datasets/allenai_Sera-4.6-Lite-T2/metadata.json b/datasets/allenai_Sera-4.6-Lite-T2/metadata.json new file mode 100644 index 00000000..41212e45 --- /dev/null +++ b/datasets/allenai_Sera-4.6-Lite-T2/metadata.json @@ -0,0 +1,7 @@ +{ + "custom_tools": [], + "code_enabled": [ + "bash" + ], + "browser_enabled": false +} diff --git a/datasets/android_in_the_wild/api.py b/datasets/android_in_the_wild/api.py deleted file mode 100644 index 9031f58b..00000000 --- a/datasets/android_in_the_wild/api.py +++ /dev/null @@ -1,45 +0,0 @@ -def touch_and_lift(x0: float, y0: float, x1: float, y1: float) -> None: - """Touch at the given x0, y0 coordinates and lift at x1, y1. - - Args: - ---- - x0 (float): The x coordinate to touch. - y0 (float): The y coordinate to touch. - x1 (float): The x coordinate to lift. - y1 (float): The y coordinate to lift. - - """ - pass - - -def type(text: str): - """Type given text through keyboard. - - Args: - ---- - text (str): the text to input. - - """ - pass - - -def press(key_name: str): - """Press a special key according the key name. - - Args: - ---- - key_name (str): go_back | go_home | enter, the key to press - - """ - pass - - -def end(succeeds: bool): - """Claim the end of the task with whether it is successfully completed. - - Args: - ---- - succeeds (bool): if the task is successful - - """ - pass diff --git a/datasets/android_in_the_wild/metadata.json b/datasets/android_in_the_wild/metadata.json new file mode 100644 index 00000000..1cbbdf2b --- /dev/null +++ b/datasets/android_in_the_wild/metadata.json @@ -0,0 +1,37 @@ +{ + "custom_tools": [ + { + "type": "function", + "function": { + "name": "touch_and_lift", + "description": "Touch at the given x0, y0 coordinates and lift at x1, y1.\n\nArgs:\n----\n x0 (float): The x coordinate to touch.\n y0 (float): The y coordinate to touch.\n x1 (float): The x coordinate to lift.\n y1 (float): The y coordinate to lift.", + "parameters": { + "type": "object", + "properties": { + "x0": { + "type": "number" + }, + "y0": { + "type": "number" + }, + "x1": { + "type": "number" + }, + "y1": { + "type": "number" + } + }, + "additionalProperties": false, + "required": [ + "x0", + "y0", + "x1", + "y1" + ] + } + } + } + ], + "code_enabled": [], + "browser_enabled": false +} diff --git a/datasets/androidcontrol/api.py b/datasets/androidcontrol/api.py deleted file mode 100644 index 23be529d..00000000 --- a/datasets/androidcontrol/api.py +++ /dev/null @@ -1,58 +0,0 @@ -def click(x: int, y: int) -> None: - """Click at the specified coordinates. - - Args: - ---- - x (int): The x coordinate to click. - y (int): The y coordinate to click. - - """ - pass - - -def scroll(direction: str) -> None: - """Scroll in the specified direction. - - Args: - ---- - direction (str): The direction to scroll. - - """ - pass - - -def input_text(text: str) -> None: - """Input text. - - Args: - ---- - text (str): The text to input. - - """ - pass - - -def navigate_home() -> None: - """Navigate to the home screen.""" - pass - - -def back() -> None: - """Navigate back.""" - pass - - -def open_app(app_name: str) -> None: - """Open the specified app. - - Args: - ---- - app_name (str): The name of the app to open. - - """ - pass - - -def wait() -> None: - """Wait for a short period of time.""" - pass diff --git a/datasets/androidcontrol/metadata.json b/datasets/androidcontrol/metadata.json new file mode 100644 index 00000000..5267f6ed --- /dev/null +++ b/datasets/androidcontrol/metadata.json @@ -0,0 +1,91 @@ +{ + "custom_tools": [ + { + "type": "function", + "function": { + "name": "back", + "description": "Navigate back.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "click", + "description": "Click at the specified coordinates.\n\nArgs:\n----\n x (int): The x coordinate to click.\n y (int): The y coordinate to click.", + "parameters": { + "type": "object", + "properties": { + "x": { + "type": "integer" + }, + "y": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "x", + "y" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "open_app", + "description": "Open the specified app.\n\nArgs:\n----\n app_name (str): The name of the app to open.", + "parameters": { + "type": "object", + "properties": { + "app_name": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "app_name" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "scroll", + "description": "Scroll in the specified direction.\n\nArgs:\n----\n direction (str): The direction to scroll.", + "parameters": { + "type": "object", + "properties": { + "direction": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "direction" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "wait", + "description": "Wait for a short period of time.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": false + } + } + } + ], + "code_enabled": [], + "browser_enabled": true +} diff --git a/datasets/code_feedback/metadata.json b/datasets/code_feedback/metadata.json new file mode 100644 index 00000000..71eea4f8 --- /dev/null +++ b/datasets/code_feedback/metadata.json @@ -0,0 +1,7 @@ +{ + "custom_tools": [], + "code_enabled": [ + "python" + ], + "browser_enabled": false +} diff --git a/datasets/codeactinstruct/api.py b/datasets/codeactinstruct/api.py deleted file mode 100644 index 7f331889..00000000 --- a/datasets/codeactinstruct/api.py +++ /dev/null @@ -1,119 +0,0 @@ -def wikipedia_search(query: str) -> str: - """Search Wikipedia for a given query. - - This tool provides access to a vast collection of articles covering a wide range of topics. - It can be used to retrieve accurate and comprehensive information about specific keywords or topics. - - For example: wikipedia_search("Photosynthesis") - """ - pass - - -def put(object: str, receptacle: str) -> str: - """Put an object in/on a receptacle. - - This is used for interacting with a household environment. - - For example: put("mug 1", "desk 2") - """ - pass - - -def goto(receptacle: str) -> str: - """Go to a location of the receptacle. - - This is used for interacting with a household environment. - - For example: goto("drawer 1") - """ - pass - - -def take_from(object: str, receptacle: str) -> str: - """Take an object from a receptacle. - - This is used for interacting with a household environment. - - For example: take_from("mug 1", "shelf 2") - """ - pass - - -def open_receptacle(receptacle: str) -> str: - """Open a receptacle. - - This is used for interacting with a household environment. - - For example: open_receptacle("fridge 1") - """ - pass - - -def toggle(object_or_receptacle: str) -> str: - """Toggle an object or receptacle. - - This is used for interacting with a household environment. - - For example: toggle("light 2") - """ - pass - - -def close_receptacle(receptacle: str) -> str: - """Close a receptacle. - - This is used for interacting with a household environment. - - For example: close_receptacle("microwave 1") - """ - pass - - -def clean(object: str, receptacle: str) -> str: - """Clean an object with a receptacle. - - This is used for interacting with a household environment. - - For example: clean("cloth 1", "sinkbasin 1") - """ - pass - - -def heat(object: str, receptacle: str) -> str: - """Heat an object with a receptacle. - - This is used for interacting with a household environment. - - For example: heat("egg 1", "microwave 1") - """ - pass - - -def cool(object: str, receptacle: str) -> str: - """Cool an object with a receptacle. - - This is used for interacting with a household environment. - - For example: cool("bottle 1", "fridge 1") - """ - pass - - -def use(receptacle: str) -> str: - """Use a receptacle. - - This is used for interacting with a household environment. - - For example: use("lamp 1") - """ - pass - - -def look() -> str: - """Look around. It will return what you see in the room. - - This is used for interacting with a household environment. - - For example: look() - """ - pass diff --git a/datasets/codeactinstruct/metadata.json b/datasets/codeactinstruct/metadata.json new file mode 100644 index 00000000..360ce38f --- /dev/null +++ b/datasets/codeactinstruct/metadata.json @@ -0,0 +1,249 @@ +{ + "custom_tools": [ + { + "type": "function", + "function": { + "name": "clean", + "description": "Clean an object with a receptacle.\n\nThis is used for interacting with a household environment.\n\nFor example: clean(\"cloth 1\", \"sinkbasin 1\")", + "parameters": { + "type": "object", + "properties": { + "object": { + "type": "string" + }, + "receptacle": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "object", + "receptacle" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "close_receptacle", + "description": "Close a receptacle.\n\nThis is used for interacting with a household environment.\n\nFor example: close_receptacle(\"microwave 1\")", + "parameters": { + "type": "object", + "properties": { + "receptacle": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "receptacle" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "cool", + "description": "Cool an object with a receptacle.\n\nThis is used for interacting with a household environment.\n\nFor example: cool(\"bottle 1\", \"fridge 1\")", + "parameters": { + "type": "object", + "properties": { + "object": { + "type": "string" + }, + "receptacle": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "object", + "receptacle" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "goto", + "description": "Go to a location of the receptacle.\n\nThis is used for interacting with a household environment.\n\nFor example: goto(\"drawer 1\")", + "parameters": { + "type": "object", + "properties": { + "receptacle": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "receptacle" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "heat", + "description": "Heat an object with a receptacle.\n\nThis is used for interacting with a household environment.\n\nFor example: heat(\"egg 1\", \"microwave 1\")", + "parameters": { + "type": "object", + "properties": { + "object": { + "type": "string" + }, + "receptacle": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "object", + "receptacle" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "look", + "description": "Look around. It will return what you see in the room.\n\nThis is used for interacting with a household environment.\n\nFor example: look()", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "open_receptacle", + "description": "Open a receptacle.\n\nThis is used for interacting with a household environment.\n\nFor example: open_receptacle(\"fridge 1\")", + "parameters": { + "type": "object", + "properties": { + "receptacle": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "receptacle" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "put", + "description": "Put an object in/on a receptacle.\n\nThis is used for interacting with a household environment.\n\nFor example: put(\"mug 1\", \"desk 2\")", + "parameters": { + "type": "object", + "properties": { + "object": { + "type": "string" + }, + "receptacle": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "object", + "receptacle" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "take_from", + "description": "Take an object from a receptacle.\n\nThis is used for interacting with a household environment.\n\nFor example: take_from(\"mug 1\", \"shelf 2\")", + "parameters": { + "type": "object", + "properties": { + "object": { + "type": "string" + }, + "receptacle": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "object", + "receptacle" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "toggle", + "description": "Toggle an object or receptacle.\n\nThis is used for interacting with a household environment.\n\nFor example: toggle(\"light 2\")", + "parameters": { + "type": "object", + "properties": { + "object_or_receptacle": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "object_or_receptacle" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "use", + "description": "Use a receptacle.\n\nThis is used for interacting with a household environment.\n\nFor example: use(\"lamp 1\")", + "parameters": { + "type": "object", + "properties": { + "receptacle": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "receptacle" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "wikipedia_search", + "description": "Search Wikipedia for a given query.\n\nThis tool provides access to a vast collection of articles covering a wide range of topics.\nIt can be used to retrieve accurate and comprehensive information about specific keywords or topics.\n\nFor example: wikipedia_search(\"Photosynthesis\")", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "query" + ] + } + } + } + ], + "code_enabled": [ + "python" + ], + "browser_enabled": false +} diff --git a/datasets/coderforge_preview/api.py b/datasets/coderforge_preview/api.py deleted file mode 100644 index 981748a8..00000000 --- a/datasets/coderforge_preview/api.py +++ /dev/null @@ -1,34 +0,0 @@ -def str_replace_editor( - command: str, - path: str, - file_text: str = None, - old_str: str = None, - new_str: str = None, - insert_line: int = None, - view_range: list = None, -) -> None: - """View, create, and edit files with this custom editing tool. - - Args: - ---- - command (str): The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`. - path (str): Absolute path to file or directory, e.g. `/repo/file.py` or `/repo`. - file_text (str): Required parameter of `create` command, with the content of the file to be created. - old_str (str): Required parameter of `str_replace` command containing the string in `path` to replace. - new_str (str): Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert. - insert_line (int): Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`. - view_range (list): Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file. - - """ - pass - - -def think(thought: str): - """Log a thought for reasoning. - - Args: - ---- - thought (str): The thought to log. - - """ - pass diff --git a/datasets/coderforge_preview/metadata.json b/datasets/coderforge_preview/metadata.json new file mode 100644 index 00000000..41212e45 --- /dev/null +++ b/datasets/coderforge_preview/metadata.json @@ -0,0 +1,7 @@ +{ + "custom_tools": [], + "code_enabled": [ + "bash" + ], + "browser_enabled": false +} diff --git a/datasets/codescout/metadata.json b/datasets/codescout/metadata.json new file mode 100644 index 00000000..41212e45 --- /dev/null +++ b/datasets/codescout/metadata.json @@ -0,0 +1,7 @@ +{ + "custom_tools": [], + "code_enabled": [ + "bash" + ], + "browser_enabled": false +} diff --git a/datasets/cognitivekernel_pro_sft/api.py b/datasets/cognitivekernel_pro_sft/api.py deleted file mode 100644 index 92e95ab3..00000000 --- a/datasets/cognitivekernel_pro_sft/api.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import Any - - -def web_agent(task: str) -> dict: - """Use a web browser agent to complete a web task.""" - pass - - -def file_agent(task: str, file_path_dict: dict | None = None) -> dict: - """Use a file-analysis agent to answer a task over local files.""" - pass - - -def stop( - output: Any = None, - log: Any = None, - answer: Any = None, - summary: Any = None, -) -> dict: - """Finalize a task with either source-specific final-answer signature.""" - pass - - -def ask_llm(query: str) -> str: - """Ask a language model for tasks that need no external tools.""" - pass - - -def simple_web_search(query: str) -> str: - """Run a quick web search for straightforward information needs.""" - pass - - -def load_file(file_name: str) -> str: - """Load a local file into the CognitiveKernel file environment.""" - pass - - -def read_text(file_name: str, page_id_list: list) -> str: - """Read selected file pages as text.""" - pass - - -def read_screenshot(file_name: str, page_id_list: list) -> str: - """Read selected file pages with screenshot-based processing.""" - pass - - -def search(file_name: str, key_word_list: list) -> str: - """Search a file for keywords and return matching pages.""" - pass diff --git a/datasets/cognitivekernel_pro_sft/metadata.json b/datasets/cognitivekernel_pro_sft/metadata.json new file mode 100644 index 00000000..723d99e5 --- /dev/null +++ b/datasets/cognitivekernel_pro_sft/metadata.json @@ -0,0 +1,123 @@ +{ + "custom_tools": [ + { + "type": "function", + "function": { + "name": "load_file", + "description": "Load a local file into the CognitiveKernel file environment.", + "parameters": { + "type": "object", + "properties": { + "file_name": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "file_name" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "simple_web_search", + "description": "Run a quick web search for straightforward information needs.", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "query" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "web_agent", + "description": "Use a web browser agent to complete a web task.", + "parameters": { + "type": "object", + "properties": { + "task": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "task" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "ask_llm", + "description": "Dataset tool ask_llm.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "file_agent", + "description": "Dataset tool file_agent.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "read_screenshot", + "description": "Dataset tool read_screenshot.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "read_text", + "description": "Dataset tool read_text.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "search", + "description": "Dataset tool search.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + } + ], + "code_enabled": [], + "browser_enabled": false +} diff --git a/datasets/dolci_instruct_sft_tool_use/api.py b/datasets/dolci_instruct_sft_tool_use/api.py deleted file mode 100644 index 55affa70..00000000 --- a/datasets/dolci_instruct_sft_tool_use/api.py +++ /dev/null @@ -1,121 +0,0 @@ -from typing import Any - - -def cell_density(dilution: Any = None, od: Any = None) -> dict: - """Provide a placeholder for the `cell_density` tool in the committed Dolci sample.""" - pass - - -def combinatorics_permutation_count(k: Any = None, n: Any = None) -> dict: - """Provide a placeholder for the `combinatorics_permutation_count` tool in the committed Dolci sample.""" - pass - - -def get_all_predictions(sort: Any = None) -> dict: - """Provide a placeholder for the `get_all_predictions` tool in the committed Dolci sample.""" - pass - - -def get_city_from_zipcode(zipcode: Any = None) -> dict: - """Provide a placeholder for the `get_city_from_zipcode` tool in the committed Dolci sample.""" - pass - - -def get_matches_on_a_specific_date(date: Any = None, utc_offset: Any = None) -> dict: - """Provide a placeholder for the `get_matches_on_a_specific_date` tool in the committed Dolci sample.""" - pass - - -def is_power_of_two(num: Any = None) -> dict: - """Provide a placeholder for the `is_power_of_two` tool in the committed Dolci sample.""" - pass - - -def laliga_standings(season: Any = None) -> dict: - """Provide a placeholder for the `laliga_standings` tool in the committed Dolci sample.""" - pass - - -def leaguepowerrankingrounds(seasonid: Any = None, tournamentid: Any = None) -> dict: - """Provide a placeholder for the `leaguepowerrankingrounds` tool in the committed Dolci sample.""" - pass - - -def match_details_by_id(match_id: Any = None) -> dict: - """Provide a placeholder for the `match_details_by_id` tool in the committed Dolci sample.""" - pass - - -def physics_final_velocity( - acceleration: Any = None, initial_velocity: Any = None, time: Any = None -) -> dict: - """Provide a placeholder for the `physics_final_velocity` tool in the committed Dolci sample.""" - pass - - -def reserve_hotel_room( - checkin_date: Any = None, - checkout_date: Any = None, - guest_id: Any = None, - nightly_rate: Any = None, - room_type: Any = None, -) -> dict: - """Provide a placeholder for the `reserve_hotel_room` tool in the committed Dolci sample.""" - pass - - -def schools(identifier: Any = None) -> dict: - """Provide a placeholder for the `schools` tool in the committed Dolci sample.""" - pass - - -def select_race_based_on_race_number(race_no: Any = None) -> dict: - """Provide a placeholder for the `select_race_based_on_race_number` tool in the committed Dolci sample.""" - pass - - -def weather_forecast_weather_api(days: Any = None, q: Any = None) -> dict: - """Provide a placeholder for the `weather_forecast_weather_api` tool in the committed Dolci sample.""" - pass - - -def calculate_calorie_intake( - weight_kg: Any = None, - height_cm: Any = None, - age: Any = None, - sex: Any = None, - activity_level: Any = None, - goal: Any = None, -) -> dict: - """Stub for the advertised Dolci tool.""" - pass - - -def can_attend_all_meetings(intervals: Any = None) -> dict: - """Stub for the advertised Dolci tool.""" - pass - - -def daily_match_list_all(date: Any = None) -> dict: - """Stub for the advertised Dolci tool.""" - pass - - -def get_ip_zipcode(ip: Any = None) -> dict: - """Stub for the advertised Dolci tool.""" - pass - - -def get_pokemon_move_info(pokemon_name: Any = None, move_name: Any = None) -> dict: - """Stub for the advertised Dolci tool.""" - pass - - -def predict_evolution_rate(species: Any = None, years: Any = None, model: Any = None) -> dict: - """Stub for the advertised Dolci tool.""" - pass - - -def weather_getweatherforecast() -> dict: - """Stub for the advertised Dolci tool.""" - pass diff --git a/datasets/dolci_instruct_sft_tool_use/metadata.json b/datasets/dolci_instruct_sft_tool_use/metadata.json new file mode 100644 index 00000000..d54d65fc --- /dev/null +++ b/datasets/dolci_instruct_sft_tool_use/metadata.json @@ -0,0 +1,297 @@ +{ + "custom_tools": [ + { + "type": "function", + "function": { + "name": "cell_density", + "description": "Provide a placeholder for the `cell_density` tool in the committed Dolci sample.", + "parameters": { + "type": "object", + "properties": { + "dilution": {}, + "od": {} + }, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "combinatorics_permutation_count", + "description": "Provide a placeholder for the `combinatorics_permutation_count` tool in the committed Dolci sample.", + "parameters": { + "type": "object", + "properties": { + "k": {}, + "n": {} + }, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "get_all_predictions", + "description": "Provide a placeholder for the `get_all_predictions` tool in the committed Dolci sample.", + "parameters": { + "type": "object", + "properties": { + "sort": {} + }, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "get_city_from_zipcode", + "description": "Provide a placeholder for the `get_city_from_zipcode` tool in the committed Dolci sample.", + "parameters": { + "type": "object", + "properties": { + "zipcode": {} + }, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "get_matches_on_a_specific_date", + "description": "Provide a placeholder for the `get_matches_on_a_specific_date` tool in the committed Dolci sample.", + "parameters": { + "type": "object", + "properties": { + "date": {}, + "utc_offset": {} + }, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "is_power_of_two", + "description": "Provide a placeholder for the `is_power_of_two` tool in the committed Dolci sample.", + "parameters": { + "type": "object", + "properties": { + "num": {} + }, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "laliga_standings", + "description": "Provide a placeholder for the `laliga_standings` tool in the committed Dolci sample.", + "parameters": { + "type": "object", + "properties": { + "season": {} + }, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "leaguepowerrankingrounds", + "description": "Provide a placeholder for the `leaguepowerrankingrounds` tool in the committed Dolci sample.", + "parameters": { + "type": "object", + "properties": { + "seasonid": {}, + "tournamentid": {} + }, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "match_details_by_id", + "description": "Provide a placeholder for the `match_details_by_id` tool in the committed Dolci sample.", + "parameters": { + "type": "object", + "properties": { + "match_id": {} + }, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "physics_final_velocity", + "description": "Provide a placeholder for the `physics_final_velocity` tool in the committed Dolci sample.", + "parameters": { + "type": "object", + "properties": { + "acceleration": {}, + "initial_velocity": {}, + "time": {} + }, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "reserve_hotel_room", + "description": "Provide a placeholder for the `reserve_hotel_room` tool in the committed Dolci sample.", + "parameters": { + "type": "object", + "properties": { + "checkin_date": {}, + "checkout_date": {}, + "guest_id": {}, + "nightly_rate": {}, + "room_type": {} + }, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "schools", + "description": "Provide a placeholder for the `schools` tool in the committed Dolci sample.", + "parameters": { + "type": "object", + "properties": { + "identifier": {} + }, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "select_race_based_on_race_number", + "description": "Provide a placeholder for the `select_race_based_on_race_number` tool in the committed Dolci sample.", + "parameters": { + "type": "object", + "properties": { + "race_no": {} + }, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "weather_forecast_weather_api", + "description": "Provide a placeholder for the `weather_forecast_weather_api` tool in the committed Dolci sample.", + "parameters": { + "type": "object", + "properties": { + "days": {}, + "q": {} + }, + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "calculate_calorie_intake", + "description": "Dataset tool calculate_calorie_intake.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "can_attend_all_meetings", + "description": "Dataset tool can_attend_all_meetings.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "daily_match_list_all", + "description": "Dataset tool daily_match_list_all.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "get_ip_zipcode", + "description": "Dataset tool get_ip_zipcode.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "get_pokemon_move_info", + "description": "Dataset tool get_pokemon_move_info.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "predict_evolution_rate", + "description": "Dataset tool predict_evolution_rate.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + }, + { + "type": "function", + "function": { + "name": "weather_getweatherforecast", + "description": "Dataset tool weather_getweatherforecast.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": true + } + } + } + ], + "code_enabled": [], + "browser_enabled": false +} diff --git a/datasets/eto/metadata.json b/datasets/eto/metadata.json new file mode 100644 index 00000000..dfa26e78 --- /dev/null +++ b/datasets/eto/metadata.json @@ -0,0 +1,5 @@ +{ + "custom_tools": [], + "code_enabled": [], + "browser_enabled": false +} diff --git a/datasets/gair_davinci_dev/api.py b/datasets/gair_davinci_dev/api.py deleted file mode 100644 index b3dabf9e..00000000 --- a/datasets/gair_davinci_dev/api.py +++ /dev/null @@ -1,26 +0,0 @@ -from typing import Optional - - -def str_replace_editor( - command: str, - path: str, - file_text: Optional[str] = None, - old_str: Optional[str] = None, - new_str: Optional[str] = None, - insert_line: Optional[int] = None, - view_range: Optional[list[int]] = None, -): - """View, create, and edit files with a custom editing tool. - - Args: - ---- - command: Editor command. Allowed values include view, create, str_replace, insert, and undo_edit. - path: Absolute path to a file or directory. - file_text: File content for create commands. - old_str: Exact string to replace for str_replace commands. - new_str: Replacement or inserted string. - insert_line: Line number after which to insert new_str. - view_range: Optional line range to view. - - """ - pass diff --git a/datasets/gair_davinci_dev/metadata.json b/datasets/gair_davinci_dev/metadata.json new file mode 100644 index 00000000..41212e45 --- /dev/null +++ b/datasets/gair_davinci_dev/metadata.json @@ -0,0 +1,7 @@ +{ + "custom_tools": [], + "code_enabled": [ + "bash" + ], + "browser_enabled": false +} diff --git a/datasets/go-browse-wa/api.py b/datasets/go-browse-wa/api.py deleted file mode 100644 index c0d7edf3..00000000 --- a/datasets/go-browse-wa/api.py +++ /dev/null @@ -1,180 +0,0 @@ -from typing import List, Literal, Union - - -def noop(wait_ms: float = 1000) -> None: - """Do nothing, and optionally wait for the given time (in milliseconds). - - Args: - ---- - wait_ms (float): Time to wait in milliseconds. Defaults to 1000ms. - - """ - pass - - -def scroll(delta_x: float, delta_y: float) -> None: - """Scroll horizontally and vertically. - - Amounts in pixels, positive for right or down scrolling, - negative for left or up scrolling. Dispatches a wheel event. - - Args: - ---- - delta_x (float): The distance to scroll horizontally in pixels. - delta_y (float): The distance to scroll vertically in pixels. - - """ - pass - - -def fill(bid: str, value: str) -> None: - """Fill out a form field. - - It focuses the element and triggers an input event with the entered text. - Works for ,