diff --git a/README.md b/README.md
index 06b90381a..4ff9e72db 100644
--- a/README.md
+++ b/README.md
@@ -128,7 +128,7 @@ mkdir acontext_server && cd acontext_server
acontext server up
```
-> Make sure your LLM has the ability to [call tools](https://platform.openai.com/docs/guides/function-calling). By default, Acontext will use `gpt-4.1`.
+> Make sure your LLM has the ability to [call tools](https://platform.openai.com/docs/guides/function-calling). By default, Acontext will use `gpt-4.1`. You can also use other LLM providers by setting `LLM_SDK` to `anthropic` in your `.env` file, or use any OpenAI-compatible provider (e.g., MiniMax, DeepSeek) by setting `LLM_BASE_URL`.
`acontext server up` will create/use `.env` and `config.yaml` for Acontext, and create a `db` folder to persist data.
diff --git a/docs/content/docs/(guides)/settings/core.mdx b/docs/content/docs/(guides)/settings/core.mdx
index d5f37f3b5..753403817 100644
--- a/docs/content/docs/(guides)/settings/core.mdx
+++ b/docs/content/docs/(guides)/settings/core.mdx
@@ -29,6 +29,10 @@ Default model identifier for LLM operations. Examples: `gpt-4`, `gpt-3.5-turbo`,
Timeout in seconds for LLM API responses. Increase for longer operations.
+
+Comma-separated list of XML-style tag names to strip from LLM responses. Many reasoning models wrap chain-of-thought in tags like `...`. Set to `think` to strip those blocks, or `think,reasoning` for multiple tag types. Default is empty (no stripping — original content preserved).
+
+
### Embedding Configuration
@@ -89,6 +93,21 @@ BLOCK_EMBEDDING_PROVIDER=openai
BLOCK_EMBEDDING_API_KEY=sk-your-openai-key-for-embeddings
```
+```bash title="MiniMax Setup"
+# MiniMax is OpenAI-compatible — use LLM_SDK=openai with a custom base URL
+LLM_API_KEY=your-minimax-api-key
+LLM_SDK=openai
+LLM_BASE_URL=https://api.minimax.io/v1
+LLM_SIMPLE_MODEL=MiniMax-M2.7
+
+# Strip reasoning tags from MiniMax responses (optional)
+LLM_STRIP_TAGS=think
+
+# Keep OpenAI for embeddings (recommended)
+BLOCK_EMBEDDING_PROVIDER=openai
+BLOCK_EMBEDDING_API_KEY=sk-your-openai-key-for-embeddings
+```
+
```bash title="Custom Endpoints"
# Custom LLM endpoint (e.g., Azure OpenAI)
LLM_API_KEY=your-azure-key
diff --git a/src/server/core/acontext_core/llm/complete/openai_sdk.py b/src/server/core/acontext_core/llm/complete/openai_sdk.py
index fdefff8aa..6a4bef7c7 100644
--- a/src/server/core/acontext_core/llm/complete/openai_sdk.py
+++ b/src/server/core/acontext_core/llm/complete/openai_sdk.py
@@ -1,4 +1,5 @@
import json
+import re
from typing import Optional
from .clients import get_openai_async_client_instance
from openai.types.chat import ChatCompletion
@@ -9,6 +10,23 @@
from ...telemetry.log import get_wide_event
+def _strip_tags(text: str, tags: list[str]) -> str:
+ """Strip named XML-style tag blocks from model responses.
+
+ Many reasoning models (DeepSeek, QwQ, MiniMax, etc.) wrap their internal
+ chain-of-thought in tags like ``...``. This helper removes
+ the specified tag blocks so that downstream consumers receive only the
+ final answer.
+
+ Args:
+ text: The raw response text.
+ tags: Tag names to strip, e.g. ``["think", "reasoning"]``.
+ """
+ for tag in tags:
+ text = re.sub(rf"<{tag}>[\s\S]*?{tag}>\s*", "", text)
+ return text.strip()
+
+
def convert_openai_tool_to_llm_tool(tool_body: ChatCompletionMessageToolCall) -> dict:
return {
"id": tool_body.id,
@@ -90,20 +108,24 @@ async def openai_complete(
else None
)
+ content = response.choices[0].message.content
+ if content and DEFAULT_CORE_CONFIG.llm_strip_tags:
+ content = _strip_tags(content, DEFAULT_CORE_CONFIG.llm_strip_tags)
+
llm_response = LLMResponse(
role=response.choices[0].message.role,
raw_response=response,
- content=response.choices[0].message.content,
+ content=content,
tool_calls=_tu,
)
if json_mode:
try:
- json_content = json.loads(response.choices[0].message.content)
+ json_content = json.loads(content) if content else None
except json.JSONDecodeError:
LOG.error(
"llm.json_decode_error",
- content=response.choices[0].message.content[:200],
+ content=(content or "")[:200],
)
json_content = None
llm_response.json_content = json_content
diff --git a/src/server/core/acontext_core/schema/config.py b/src/server/core/acontext_core/schema/config.py
index f5ec77287..51ee2ec6a 100644
--- a/src/server/core/acontext_core/schema/config.py
+++ b/src/server/core/acontext_core/schema/config.py
@@ -1,6 +1,6 @@
import os
import yaml
-from pydantic import BaseModel
+from pydantic import BaseModel, field_validator
from typing import Literal, Mapping, Optional, Any, Type
@@ -24,6 +24,15 @@ class CoreConfig(BaseModel):
llm_sdk: Literal["openai", "anthropic", "mock"] = "openai"
llm_simple_model: str = "gpt-4.1"
+ llm_strip_tags: list[str] = []
+
+ @field_validator("llm_strip_tags", mode="before")
+ @classmethod
+ def parse_strip_tags(cls, v):
+ """Accept a comma-separated string (from env var) or a list."""
+ if isinstance(v, str):
+ return [t.strip() for t in v.split(",") if t.strip()]
+ return v
# Core Configuration
logging_format: str = "json"
diff --git a/src/server/core/tests/llm/test_think_tag_stripping.py b/src/server/core/tests/llm/test_think_tag_stripping.py
new file mode 100644
index 000000000..0eae0f241
--- /dev/null
+++ b/src/server/core/tests/llm/test_think_tag_stripping.py
@@ -0,0 +1,219 @@
+"""
+Tests for configurable tag stripping in ``openai_complete``.
+
+The ``_strip_tags`` helper removes XML-style tag blocks (e.g.
+``...``, ``...``) that reasoning models
+inject before their final answer. Stripping is **off by default** and
+controlled by the ``llm_strip_tags`` config field (env var
+``LLM_STRIP_TAGS``).
+"""
+
+import json
+import pytest
+from unittest.mock import AsyncMock, patch, MagicMock
+from openai.types.chat import ChatCompletion, ChatCompletionMessage
+from openai.types.chat.chat_completion import Choice
+from openai.types.completion_usage import CompletionUsage
+
+from acontext_core.llm.complete.openai_sdk import _strip_tags, openai_complete
+from acontext_core.schema.llm import LLMResponse
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_chat_completion(content="Hello", tool_calls=None):
+ """Build a real ``ChatCompletion`` object for tests."""
+ message = ChatCompletionMessage(
+ role="assistant",
+ content=content,
+ tool_calls=tool_calls,
+ )
+ return ChatCompletion(
+ id="chatcmpl-test",
+ choices=[Choice(finish_reason="stop", index=0, message=message)],
+ created=1700000000,
+ model="test-model",
+ object="chat.completion",
+ usage=CompletionUsage(
+ prompt_tokens=10,
+ completion_tokens=20,
+ total_tokens=30,
+ ),
+ )
+
+
+def _patch_openai_complete(mock_response, strip_tags=None):
+ """Return context-manager patches for ``openai_complete``.
+
+ Args:
+ mock_response: The ``ChatCompletion`` to return from the mocked client.
+ strip_tags: List of tag names for ``llm_strip_tags`` config.
+ Defaults to ``[]`` (no stripping).
+ """
+ if strip_tags is None:
+ strip_tags = []
+
+ mock_client = AsyncMock()
+ mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
+
+ mock_cfg = MagicMock()
+ mock_cfg.llm_strip_tags = strip_tags
+ mock_cfg.llm_response_timeout = 60
+ mock_cfg.llm_openai_completion_kwargs = {}
+
+ p_client = patch(
+ "acontext_core.llm.complete.openai_sdk.get_openai_async_client_instance",
+ return_value=mock_client,
+ )
+ p_wide = patch(
+ "acontext_core.llm.complete.openai_sdk.get_wide_event",
+ return_value={},
+ )
+ p_config = patch(
+ "acontext_core.llm.complete.openai_sdk.DEFAULT_CORE_CONFIG",
+ mock_cfg,
+ )
+ return p_client, p_wide, p_config
+
+
+# ---------------------------------------------------------------------------
+# _strip_tags unit tests
+# ---------------------------------------------------------------------------
+
+class TestStripTags:
+ """Test stripping arbitrary XML-style tag blocks from model responses."""
+
+ def test_strip_single_tag(self):
+ text = "reasoning hereactual response"
+ assert _strip_tags(text, ["think"]) == "actual response"
+
+ def test_strip_multiline_tag(self):
+ text = "\nstep 1\nstep 2\n\nfinal answer"
+ assert _strip_tags(text, ["think"]) == "final answer"
+
+ def test_no_matching_tags(self):
+ text = "just a normal response"
+ assert _strip_tags(text, ["think"]) == "just a normal response"
+
+ def test_empty_tag_block(self):
+ text = "response"
+ assert _strip_tags(text, ["think"]) == "response"
+
+ def test_tag_in_middle(self):
+ text = "before thinking after"
+ assert _strip_tags(text, ["think"]) == "before after"
+
+ def test_multiple_occurrences(self):
+ text = "firstmiddlesecondend"
+ assert _strip_tags(text, ["think"]) == "middleend"
+
+ def test_nested_angle_brackets_inside_tag(self):
+ text = "if a < b and b > c thenanswer"
+ assert _strip_tags(text, ["think"]) == "answer"
+
+ def test_empty_string(self):
+ assert _strip_tags("", ["think"]) == ""
+
+ def test_only_tag_block(self):
+ text = "all reasoning"
+ assert _strip_tags(text, ["think"]) == ""
+
+ def test_multiple_tag_types(self):
+ text = "thoughtmiddlereasonend"
+ assert _strip_tags(text, ["think", "reasoning"]) == "middleend"
+
+ def test_empty_tags_list_preserves_content(self):
+ text = "reasoninganswer"
+ assert _strip_tags(text, []) == "reasoninganswer"
+
+ def test_non_matching_tag_preserved(self):
+ text = "reasoninganswer"
+ assert _strip_tags(text, ["reasoning"]) == "reasoninganswer"
+
+
+# ---------------------------------------------------------------------------
+# openai_complete integration tests (mocked client)
+# ---------------------------------------------------------------------------
+
+class TestOpenAICompleteTagStripping:
+ """Verify that ``openai_complete`` strips tags only when configured."""
+
+ @pytest.mark.asyncio
+ async def test_no_stripping_by_default(self):
+ """With default config (empty strip_tags), think tags are preserved."""
+ raw = "Let me reason...The answer is 42."
+ response = _make_chat_completion(content=raw)
+ p1, p2, p3 = _patch_openai_complete(response, strip_tags=[])
+
+ with p1, p2, p3:
+ result = await openai_complete(prompt="test", model="m")
+
+ assert result.content == raw
+
+ @pytest.mark.asyncio
+ async def test_stripping_when_configured(self):
+ """When llm_strip_tags=["think"], think tags are removed."""
+ response = _make_chat_completion(
+ content="Let me reason step by step...The answer is 42."
+ )
+ p1, p2, p3 = _patch_openai_complete(response, strip_tags=["think"])
+
+ with p1, p2, p3:
+ result = await openai_complete(prompt="test", model="m")
+
+ assert isinstance(result, LLMResponse)
+ assert result.content == "The answer is 42."
+
+ @pytest.mark.asyncio
+ async def test_no_think_tags_unchanged(self):
+ """Response without think tags should pass through unchanged."""
+ response = _make_chat_completion(content="Hello from the model!")
+ p1, p2, p3 = _patch_openai_complete(response, strip_tags=["think"])
+
+ with p1, p2, p3:
+ result = await openai_complete(prompt="Say hello", model="m")
+
+ assert result.content == "Hello from the model!"
+
+ @pytest.mark.asyncio
+ async def test_json_mode_with_stripping(self):
+ """JSON mode should parse correctly after stripping think tags."""
+ response = _make_chat_completion(
+ content='reasoning{"key": "value"}'
+ )
+ p1, p2, p3 = _patch_openai_complete(response, strip_tags=["think"])
+
+ with p1, p2, p3:
+ result = await openai_complete(
+ prompt="Return JSON", model="m", json_mode=True,
+ )
+
+ assert result.json_content == {"key": "value"}
+
+ @pytest.mark.asyncio
+ async def test_none_content_not_stripped(self):
+ """None content (e.g. tool-call-only response) should remain None."""
+ response = _make_chat_completion(content=None)
+ p1, p2, p3 = _patch_openai_complete(response, strip_tags=["think"])
+
+ with p1, p2, p3:
+ result = await openai_complete(prompt="call a tool", model="m")
+
+ assert result.content is None
+
+ @pytest.mark.asyncio
+ async def test_multiple_tag_types_stripped(self):
+ """Multiple tag types configured should all be stripped."""
+ response = _make_chat_completion(
+ content="thoughtreasonfinal"
+ )
+ p1, p2, p3 = _patch_openai_complete(
+ response, strip_tags=["think", "reasoning"]
+ )
+
+ with p1, p2, p3:
+ result = await openai_complete(prompt="test", model="m")
+
+ assert result.content == "final"