diff --git a/README.md b/README.md index 06b90381a..4ff9e72db 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,7 @@ mkdir acontext_server && cd acontext_server acontext server up ``` -> Make sure your LLM has the ability to [call tools](https://platform.openai.com/docs/guides/function-calling). By default, Acontext will use `gpt-4.1`. +> Make sure your LLM has the ability to [call tools](https://platform.openai.com/docs/guides/function-calling). By default, Acontext will use `gpt-4.1`. You can also use other LLM providers by setting `LLM_SDK` to `anthropic` in your `.env` file, or use any OpenAI-compatible provider (e.g., MiniMax, DeepSeek) by setting `LLM_BASE_URL`. `acontext server up` will create/use `.env` and `config.yaml` for Acontext, and create a `db` folder to persist data. diff --git a/docs/content/docs/(guides)/settings/core.mdx b/docs/content/docs/(guides)/settings/core.mdx index d5f37f3b5..753403817 100644 --- a/docs/content/docs/(guides)/settings/core.mdx +++ b/docs/content/docs/(guides)/settings/core.mdx @@ -29,6 +29,10 @@ Default model identifier for LLM operations. Examples: `gpt-4`, `gpt-3.5-turbo`, Timeout in seconds for LLM API responses. Increase for longer operations. + +Comma-separated list of XML-style tag names to strip from LLM responses. Many reasoning models wrap chain-of-thought in tags like `...`. Set to `think` to strip those blocks, or `think,reasoning` for multiple tag types. Default is empty (no stripping — original content preserved). + + ### Embedding Configuration @@ -89,6 +93,21 @@ BLOCK_EMBEDDING_PROVIDER=openai BLOCK_EMBEDDING_API_KEY=sk-your-openai-key-for-embeddings ``` +```bash title="MiniMax Setup" +# MiniMax is OpenAI-compatible — use LLM_SDK=openai with a custom base URL +LLM_API_KEY=your-minimax-api-key +LLM_SDK=openai +LLM_BASE_URL=https://api.minimax.io/v1 +LLM_SIMPLE_MODEL=MiniMax-M2.7 + +# Strip reasoning tags from MiniMax responses (optional) +LLM_STRIP_TAGS=think + +# Keep OpenAI for embeddings (recommended) +BLOCK_EMBEDDING_PROVIDER=openai +BLOCK_EMBEDDING_API_KEY=sk-your-openai-key-for-embeddings +``` + ```bash title="Custom Endpoints" # Custom LLM endpoint (e.g., Azure OpenAI) LLM_API_KEY=your-azure-key diff --git a/src/server/core/acontext_core/llm/complete/openai_sdk.py b/src/server/core/acontext_core/llm/complete/openai_sdk.py index fdefff8aa..6a4bef7c7 100644 --- a/src/server/core/acontext_core/llm/complete/openai_sdk.py +++ b/src/server/core/acontext_core/llm/complete/openai_sdk.py @@ -1,4 +1,5 @@ import json +import re from typing import Optional from .clients import get_openai_async_client_instance from openai.types.chat import ChatCompletion @@ -9,6 +10,23 @@ from ...telemetry.log import get_wide_event +def _strip_tags(text: str, tags: list[str]) -> str: + """Strip named XML-style tag blocks from model responses. + + Many reasoning models (DeepSeek, QwQ, MiniMax, etc.) wrap their internal + chain-of-thought in tags like ``...``. This helper removes + the specified tag blocks so that downstream consumers receive only the + final answer. + + Args: + text: The raw response text. + tags: Tag names to strip, e.g. ``["think", "reasoning"]``. + """ + for tag in tags: + text = re.sub(rf"<{tag}>[\s\S]*?\s*", "", text) + return text.strip() + + def convert_openai_tool_to_llm_tool(tool_body: ChatCompletionMessageToolCall) -> dict: return { "id": tool_body.id, @@ -90,20 +108,24 @@ async def openai_complete( else None ) + content = response.choices[0].message.content + if content and DEFAULT_CORE_CONFIG.llm_strip_tags: + content = _strip_tags(content, DEFAULT_CORE_CONFIG.llm_strip_tags) + llm_response = LLMResponse( role=response.choices[0].message.role, raw_response=response, - content=response.choices[0].message.content, + content=content, tool_calls=_tu, ) if json_mode: try: - json_content = json.loads(response.choices[0].message.content) + json_content = json.loads(content) if content else None except json.JSONDecodeError: LOG.error( "llm.json_decode_error", - content=response.choices[0].message.content[:200], + content=(content or "")[:200], ) json_content = None llm_response.json_content = json_content diff --git a/src/server/core/acontext_core/schema/config.py b/src/server/core/acontext_core/schema/config.py index f5ec77287..51ee2ec6a 100644 --- a/src/server/core/acontext_core/schema/config.py +++ b/src/server/core/acontext_core/schema/config.py @@ -1,6 +1,6 @@ import os import yaml -from pydantic import BaseModel +from pydantic import BaseModel, field_validator from typing import Literal, Mapping, Optional, Any, Type @@ -24,6 +24,15 @@ class CoreConfig(BaseModel): llm_sdk: Literal["openai", "anthropic", "mock"] = "openai" llm_simple_model: str = "gpt-4.1" + llm_strip_tags: list[str] = [] + + @field_validator("llm_strip_tags", mode="before") + @classmethod + def parse_strip_tags(cls, v): + """Accept a comma-separated string (from env var) or a list.""" + if isinstance(v, str): + return [t.strip() for t in v.split(",") if t.strip()] + return v # Core Configuration logging_format: str = "json" diff --git a/src/server/core/tests/llm/test_think_tag_stripping.py b/src/server/core/tests/llm/test_think_tag_stripping.py new file mode 100644 index 000000000..0eae0f241 --- /dev/null +++ b/src/server/core/tests/llm/test_think_tag_stripping.py @@ -0,0 +1,219 @@ +""" +Tests for configurable tag stripping in ``openai_complete``. + +The ``_strip_tags`` helper removes XML-style tag blocks (e.g. +``...``, ``...``) that reasoning models +inject before their final answer. Stripping is **off by default** and +controlled by the ``llm_strip_tags`` config field (env var +``LLM_STRIP_TAGS``). +""" + +import json +import pytest +from unittest.mock import AsyncMock, patch, MagicMock +from openai.types.chat import ChatCompletion, ChatCompletionMessage +from openai.types.chat.chat_completion import Choice +from openai.types.completion_usage import CompletionUsage + +from acontext_core.llm.complete.openai_sdk import _strip_tags, openai_complete +from acontext_core.schema.llm import LLMResponse + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_chat_completion(content="Hello", tool_calls=None): + """Build a real ``ChatCompletion`` object for tests.""" + message = ChatCompletionMessage( + role="assistant", + content=content, + tool_calls=tool_calls, + ) + return ChatCompletion( + id="chatcmpl-test", + choices=[Choice(finish_reason="stop", index=0, message=message)], + created=1700000000, + model="test-model", + object="chat.completion", + usage=CompletionUsage( + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + ), + ) + + +def _patch_openai_complete(mock_response, strip_tags=None): + """Return context-manager patches for ``openai_complete``. + + Args: + mock_response: The ``ChatCompletion`` to return from the mocked client. + strip_tags: List of tag names for ``llm_strip_tags`` config. + Defaults to ``[]`` (no stripping). + """ + if strip_tags is None: + strip_tags = [] + + mock_client = AsyncMock() + mock_client.chat.completions.create = AsyncMock(return_value=mock_response) + + mock_cfg = MagicMock() + mock_cfg.llm_strip_tags = strip_tags + mock_cfg.llm_response_timeout = 60 + mock_cfg.llm_openai_completion_kwargs = {} + + p_client = patch( + "acontext_core.llm.complete.openai_sdk.get_openai_async_client_instance", + return_value=mock_client, + ) + p_wide = patch( + "acontext_core.llm.complete.openai_sdk.get_wide_event", + return_value={}, + ) + p_config = patch( + "acontext_core.llm.complete.openai_sdk.DEFAULT_CORE_CONFIG", + mock_cfg, + ) + return p_client, p_wide, p_config + + +# --------------------------------------------------------------------------- +# _strip_tags unit tests +# --------------------------------------------------------------------------- + +class TestStripTags: + """Test stripping arbitrary XML-style tag blocks from model responses.""" + + def test_strip_single_tag(self): + text = "reasoning hereactual response" + assert _strip_tags(text, ["think"]) == "actual response" + + def test_strip_multiline_tag(self): + text = "\nstep 1\nstep 2\n\nfinal answer" + assert _strip_tags(text, ["think"]) == "final answer" + + def test_no_matching_tags(self): + text = "just a normal response" + assert _strip_tags(text, ["think"]) == "just a normal response" + + def test_empty_tag_block(self): + text = "response" + assert _strip_tags(text, ["think"]) == "response" + + def test_tag_in_middle(self): + text = "before thinking after" + assert _strip_tags(text, ["think"]) == "before after" + + def test_multiple_occurrences(self): + text = "firstmiddlesecondend" + assert _strip_tags(text, ["think"]) == "middleend" + + def test_nested_angle_brackets_inside_tag(self): + text = "if a < b and b > c thenanswer" + assert _strip_tags(text, ["think"]) == "answer" + + def test_empty_string(self): + assert _strip_tags("", ["think"]) == "" + + def test_only_tag_block(self): + text = "all reasoning" + assert _strip_tags(text, ["think"]) == "" + + def test_multiple_tag_types(self): + text = "thoughtmiddlereasonend" + assert _strip_tags(text, ["think", "reasoning"]) == "middleend" + + def test_empty_tags_list_preserves_content(self): + text = "reasoninganswer" + assert _strip_tags(text, []) == "reasoninganswer" + + def test_non_matching_tag_preserved(self): + text = "reasoninganswer" + assert _strip_tags(text, ["reasoning"]) == "reasoninganswer" + + +# --------------------------------------------------------------------------- +# openai_complete integration tests (mocked client) +# --------------------------------------------------------------------------- + +class TestOpenAICompleteTagStripping: + """Verify that ``openai_complete`` strips tags only when configured.""" + + @pytest.mark.asyncio + async def test_no_stripping_by_default(self): + """With default config (empty strip_tags), think tags are preserved.""" + raw = "Let me reason...The answer is 42." + response = _make_chat_completion(content=raw) + p1, p2, p3 = _patch_openai_complete(response, strip_tags=[]) + + with p1, p2, p3: + result = await openai_complete(prompt="test", model="m") + + assert result.content == raw + + @pytest.mark.asyncio + async def test_stripping_when_configured(self): + """When llm_strip_tags=["think"], think tags are removed.""" + response = _make_chat_completion( + content="Let me reason step by step...The answer is 42." + ) + p1, p2, p3 = _patch_openai_complete(response, strip_tags=["think"]) + + with p1, p2, p3: + result = await openai_complete(prompt="test", model="m") + + assert isinstance(result, LLMResponse) + assert result.content == "The answer is 42." + + @pytest.mark.asyncio + async def test_no_think_tags_unchanged(self): + """Response without think tags should pass through unchanged.""" + response = _make_chat_completion(content="Hello from the model!") + p1, p2, p3 = _patch_openai_complete(response, strip_tags=["think"]) + + with p1, p2, p3: + result = await openai_complete(prompt="Say hello", model="m") + + assert result.content == "Hello from the model!" + + @pytest.mark.asyncio + async def test_json_mode_with_stripping(self): + """JSON mode should parse correctly after stripping think tags.""" + response = _make_chat_completion( + content='reasoning{"key": "value"}' + ) + p1, p2, p3 = _patch_openai_complete(response, strip_tags=["think"]) + + with p1, p2, p3: + result = await openai_complete( + prompt="Return JSON", model="m", json_mode=True, + ) + + assert result.json_content == {"key": "value"} + + @pytest.mark.asyncio + async def test_none_content_not_stripped(self): + """None content (e.g. tool-call-only response) should remain None.""" + response = _make_chat_completion(content=None) + p1, p2, p3 = _patch_openai_complete(response, strip_tags=["think"]) + + with p1, p2, p3: + result = await openai_complete(prompt="call a tool", model="m") + + assert result.content is None + + @pytest.mark.asyncio + async def test_multiple_tag_types_stripped(self): + """Multiple tag types configured should all be stripped.""" + response = _make_chat_completion( + content="thoughtreasonfinal" + ) + p1, p2, p3 = _patch_openai_complete( + response, strip_tags=["think", "reasoning"] + ) + + with p1, p2, p3: + result = await openai_complete(prompt="test", model="m") + + assert result.content == "final"