Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ mkdir acontext_server && cd acontext_server
acontext server up
```

> Make sure your LLM has the ability to [call tools](https://platform.openai.com/docs/guides/function-calling). By default, Acontext will use `gpt-4.1`.
> Make sure your LLM has the ability to [call tools](https://platform.openai.com/docs/guides/function-calling). By default, Acontext will use `gpt-4.1`. You can also use other LLM providers by setting `LLM_SDK` to `anthropic` in your `.env` file, or use any OpenAI-compatible provider (e.g., MiniMax, DeepSeek) by setting `LLM_BASE_URL`.

`acontext server up` will create/use `.env` and `config.yaml` for Acontext, and create a `db` folder to persist data.

Expand Down
19 changes: 19 additions & 0 deletions docs/content/docs/(guides)/settings/core.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ Default model identifier for LLM operations. Examples: `gpt-4`, `gpt-3.5-turbo`,
Timeout in seconds for LLM API responses. Increase for longer operations.
</ParamField>

<ParamField path="LLM_STRIP_TAGS" type="string" default="">
Comma-separated list of XML-style tag names to strip from LLM responses. Many reasoning models wrap chain-of-thought in tags like `<think>...</think>`. Set to `think` to strip those blocks, or `think,reasoning` for multiple tag types. Default is empty (no stripping — original content preserved).
</ParamField>

### Embedding Configuration

<ParamField path="BLOCK_EMBEDDING_PROVIDER" type="string" default="openai">
Expand Down Expand Up @@ -89,6 +93,21 @@ BLOCK_EMBEDDING_PROVIDER=openai
BLOCK_EMBEDDING_API_KEY=sk-your-openai-key-for-embeddings
```

```bash title="MiniMax Setup"
# MiniMax is OpenAI-compatible — use LLM_SDK=openai with a custom base URL
LLM_API_KEY=your-minimax-api-key
LLM_SDK=openai
LLM_BASE_URL=https://api.minimax.io/v1
LLM_SIMPLE_MODEL=MiniMax-M2.7

# Strip reasoning tags from MiniMax responses (optional)
LLM_STRIP_TAGS=think

# Keep OpenAI for embeddings (recommended)
BLOCK_EMBEDDING_PROVIDER=openai
BLOCK_EMBEDDING_API_KEY=sk-your-openai-key-for-embeddings
```

```bash title="Custom Endpoints"
# Custom LLM endpoint (e.g., Azure OpenAI)
LLM_API_KEY=your-azure-key
Expand Down
28 changes: 25 additions & 3 deletions src/server/core/acontext_core/llm/complete/openai_sdk.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import re
from typing import Optional
from .clients import get_openai_async_client_instance
from openai.types.chat import ChatCompletion
Expand All @@ -9,6 +10,23 @@
from ...telemetry.log import get_wide_event


def _strip_tags(text: str, tags: list[str]) -> str:
"""Strip named XML-style tag blocks from model responses.

Many reasoning models (DeepSeek, QwQ, MiniMax, etc.) wrap their internal
chain-of-thought in tags like ``<think>...</think>``. This helper removes
the specified tag blocks so that downstream consumers receive only the
final answer.

Args:
text: The raw response text.
tags: Tag names to strip, e.g. ``["think", "reasoning"]``.
"""
for tag in tags:
text = re.sub(rf"<{tag}>[\s\S]*?</{tag}>\s*", "", text)
return text.strip()


def convert_openai_tool_to_llm_tool(tool_body: ChatCompletionMessageToolCall) -> dict:
return {
"id": tool_body.id,
Expand Down Expand Up @@ -90,20 +108,24 @@ async def openai_complete(
else None
)

content = response.choices[0].message.content
if content and DEFAULT_CORE_CONFIG.llm_strip_tags:
content = _strip_tags(content, DEFAULT_CORE_CONFIG.llm_strip_tags)

llm_response = LLMResponse(
role=response.choices[0].message.role,
raw_response=response,
content=response.choices[0].message.content,
content=content,
tool_calls=_tu,
)

if json_mode:
try:
json_content = json.loads(response.choices[0].message.content)
json_content = json.loads(content) if content else None
except json.JSONDecodeError:
LOG.error(
"llm.json_decode_error",
content=response.choices[0].message.content[:200],
content=(content or "")[:200],
)
json_content = None
llm_response.json_content = json_content
Expand Down
11 changes: 10 additions & 1 deletion src/server/core/acontext_core/schema/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import yaml
from pydantic import BaseModel
from pydantic import BaseModel, field_validator
from typing import Literal, Mapping, Optional, Any, Type


Expand All @@ -24,6 +24,15 @@ class CoreConfig(BaseModel):
llm_sdk: Literal["openai", "anthropic", "mock"] = "openai"

llm_simple_model: str = "gpt-4.1"
llm_strip_tags: list[str] = []

@field_validator("llm_strip_tags", mode="before")
@classmethod
def parse_strip_tags(cls, v):
"""Accept a comma-separated string (from env var) or a list."""
if isinstance(v, str):
return [t.strip() for t in v.split(",") if t.strip()]
return v

# Core Configuration
logging_format: str = "json"
Expand Down
219 changes: 219 additions & 0 deletions src/server/core/tests/llm/test_think_tag_stripping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
"""
Tests for configurable tag stripping in ``openai_complete``.

The ``_strip_tags`` helper removes XML-style tag blocks (e.g.
``<think>...</think>``, ``<reasoning>...</reasoning>``) that reasoning models
inject before their final answer. Stripping is **off by default** and
controlled by the ``llm_strip_tags`` config field (env var
``LLM_STRIP_TAGS``).
"""

import json
import pytest
from unittest.mock import AsyncMock, patch, MagicMock
from openai.types.chat import ChatCompletion, ChatCompletionMessage
from openai.types.chat.chat_completion import Choice
from openai.types.completion_usage import CompletionUsage

from acontext_core.llm.complete.openai_sdk import _strip_tags, openai_complete
from acontext_core.schema.llm import LLMResponse


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _make_chat_completion(content="Hello", tool_calls=None):
"""Build a real ``ChatCompletion`` object for tests."""
message = ChatCompletionMessage(
role="assistant",
content=content,
tool_calls=tool_calls,
)
return ChatCompletion(
id="chatcmpl-test",
choices=[Choice(finish_reason="stop", index=0, message=message)],
created=1700000000,
model="test-model",
object="chat.completion",
usage=CompletionUsage(
prompt_tokens=10,
completion_tokens=20,
total_tokens=30,
),
)


def _patch_openai_complete(mock_response, strip_tags=None):
"""Return context-manager patches for ``openai_complete``.

Args:
mock_response: The ``ChatCompletion`` to return from the mocked client.
strip_tags: List of tag names for ``llm_strip_tags`` config.
Defaults to ``[]`` (no stripping).
"""
if strip_tags is None:
strip_tags = []

mock_client = AsyncMock()
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)

mock_cfg = MagicMock()
mock_cfg.llm_strip_tags = strip_tags
mock_cfg.llm_response_timeout = 60
mock_cfg.llm_openai_completion_kwargs = {}

p_client = patch(
"acontext_core.llm.complete.openai_sdk.get_openai_async_client_instance",
return_value=mock_client,
)
p_wide = patch(
"acontext_core.llm.complete.openai_sdk.get_wide_event",
return_value={},
)
p_config = patch(
"acontext_core.llm.complete.openai_sdk.DEFAULT_CORE_CONFIG",
mock_cfg,
)
return p_client, p_wide, p_config


# ---------------------------------------------------------------------------
# _strip_tags unit tests
# ---------------------------------------------------------------------------

class TestStripTags:
"""Test stripping arbitrary XML-style tag blocks from model responses."""

def test_strip_single_tag(self):
text = "<think>reasoning here</think>actual response"
assert _strip_tags(text, ["think"]) == "actual response"

def test_strip_multiline_tag(self):
text = "<think>\nstep 1\nstep 2\n</think>\nfinal answer"
assert _strip_tags(text, ["think"]) == "final answer"

def test_no_matching_tags(self):
text = "just a normal response"
assert _strip_tags(text, ["think"]) == "just a normal response"

def test_empty_tag_block(self):
text = "<think></think>response"
assert _strip_tags(text, ["think"]) == "response"

def test_tag_in_middle(self):
text = "before <think>thinking</think> after"
assert _strip_tags(text, ["think"]) == "before after"

def test_multiple_occurrences(self):
text = "<think>first</think>middle<think>second</think>end"
assert _strip_tags(text, ["think"]) == "middleend"

def test_nested_angle_brackets_inside_tag(self):
text = "<think>if a < b and b > c then</think>answer"
assert _strip_tags(text, ["think"]) == "answer"

def test_empty_string(self):
assert _strip_tags("", ["think"]) == ""

def test_only_tag_block(self):
text = "<think>all reasoning</think>"
assert _strip_tags(text, ["think"]) == ""

def test_multiple_tag_types(self):
text = "<think>thought</think>middle<reasoning>reason</reasoning>end"
assert _strip_tags(text, ["think", "reasoning"]) == "middleend"

def test_empty_tags_list_preserves_content(self):
text = "<think>reasoning</think>answer"
assert _strip_tags(text, []) == "<think>reasoning</think>answer"

def test_non_matching_tag_preserved(self):
text = "<think>reasoning</think>answer"
assert _strip_tags(text, ["reasoning"]) == "<think>reasoning</think>answer"


# ---------------------------------------------------------------------------
# openai_complete integration tests (mocked client)
# ---------------------------------------------------------------------------

class TestOpenAICompleteTagStripping:
"""Verify that ``openai_complete`` strips tags only when configured."""

@pytest.mark.asyncio
async def test_no_stripping_by_default(self):
"""With default config (empty strip_tags), think tags are preserved."""
raw = "<think>Let me reason...</think>The answer is 42."
response = _make_chat_completion(content=raw)
p1, p2, p3 = _patch_openai_complete(response, strip_tags=[])

with p1, p2, p3:
result = await openai_complete(prompt="test", model="m")

assert result.content == raw

@pytest.mark.asyncio
async def test_stripping_when_configured(self):
"""When llm_strip_tags=["think"], think tags are removed."""
response = _make_chat_completion(
content="<think>Let me reason step by step...</think>The answer is 42."
)
p1, p2, p3 = _patch_openai_complete(response, strip_tags=["think"])

with p1, p2, p3:
result = await openai_complete(prompt="test", model="m")

assert isinstance(result, LLMResponse)
assert result.content == "The answer is 42."

@pytest.mark.asyncio
async def test_no_think_tags_unchanged(self):
"""Response without think tags should pass through unchanged."""
response = _make_chat_completion(content="Hello from the model!")
p1, p2, p3 = _patch_openai_complete(response, strip_tags=["think"])

with p1, p2, p3:
result = await openai_complete(prompt="Say hello", model="m")

assert result.content == "Hello from the model!"

@pytest.mark.asyncio
async def test_json_mode_with_stripping(self):
"""JSON mode should parse correctly after stripping think tags."""
response = _make_chat_completion(
content='<think>reasoning</think>{"key": "value"}'
)
p1, p2, p3 = _patch_openai_complete(response, strip_tags=["think"])

with p1, p2, p3:
result = await openai_complete(
prompt="Return JSON", model="m", json_mode=True,
)

assert result.json_content == {"key": "value"}

@pytest.mark.asyncio
async def test_none_content_not_stripped(self):
"""None content (e.g. tool-call-only response) should remain None."""
response = _make_chat_completion(content=None)
p1, p2, p3 = _patch_openai_complete(response, strip_tags=["think"])

with p1, p2, p3:
result = await openai_complete(prompt="call a tool", model="m")

assert result.content is None

@pytest.mark.asyncio
async def test_multiple_tag_types_stripped(self):
"""Multiple tag types configured should all be stripped."""
response = _make_chat_completion(
content="<think>thought</think><reasoning>reason</reasoning>final"
)
p1, p2, p3 = _patch_openai_complete(
response, strip_tags=["think", "reasoning"]
)

with p1, p2, p3:
result = await openai_complete(prompt="test", model="m")

assert result.content == "final"
Loading