feat(compressor): CJK-aware token estimation + linear compress flow #21

Merged
fischer merged 3 commits from feat/context-compressor-cjk into main 2026-07-03 09:40:29 +08:00
2 changed files with 60 additions and 6 deletions
Showing only changes of commit 3a05c4d1e6 - Show all commits

View File

@ -108,11 +108,7 @@ class ContextCompressor:
def estimate_tokens(self, messages: list[dict]) -> int:
"""Estimate total tokens in message list (CJK 1:1, ASCII 4:1)"""
total = 0
for msg in messages:
content = msg.get("content", "")
total += estimate_text_tokens(str(content))
return total
return sum(estimate_text_tokens(str(m.get("content", ""))) for m in messages)
async def compress(self, messages: list[dict]) -> list[dict]:
"""Compress messages if they exceed token budget.
@ -205,7 +201,10 @@ class ContextCompressor:
# Pre-truncate if conversation_text exceeds safe token threshold
estimated_tokens = estimate_text_tokens(conversation_text)
if estimated_tokens > max_input_tokens:
max_chars = max_input_tokens * 4
# CJK-aware char limit: max_input_tokens chars is exact for CJK (1:1),
# conservative for ASCII (4:1, truncates to 1/4 budget but safe).
# Review fix #1: old `* 4` allowed 4x token budget for CJK text.
max_chars = max_input_tokens
conversation_text = conversation_text[:max_chars] + "\n...[truncated]"
prompt = (

View File

@ -126,6 +126,29 @@ class TestEstimateTextTokensCJK:
old_estimate = len("你好" * 50) // 4 # old: len // 4
assert new_estimate >= old_estimate * 4
async def test_summarize_cjk_pre_truncation(self):
"""Review fix #2: _summarize() CJK 文本预截断正确触发
构造 CJK 文本使 estimate_text_tokens > max_input_tokens
len(text) < max_input_tokens * 4验证旧 bug* 4 假设允许 4x 超预算
"""
gateway = make_mock_gateway("Summary result")
compressor = ContextCompressor(llm_gateway=gateway)
# 4000 CJK chars = 4000 tokens (1:1), > max_input_tokens=3200
# But len=4000 < 3200 * 4 = 12800, so old `* 4` limit wouldn't truncate
cjk_content = "" * 4000
messages = [{"role": "user", "content": cjk_content}]
await compressor._summarize(messages, max_input_tokens=3200)
# Verify LLM was called with truncated text (not full 4000 chars)
call_messages = gateway.chat.call_args.kwargs["messages"]
prompt_content = call_messages[0]["content"]
# The conversation_text in the prompt should be truncated to <= 3200 chars
# (plus truncation marker), not the full 4000 chars
assert "...[truncated]" in prompt_content
# Verify the CJK content was actually shortened
assert prompt_content.count("") < 4000
class TestNoCompressionWhenUnderBudget:
"""Token 预算内不压缩"""
@ -436,6 +459,8 @@ class TestCompressLinearFlow:
# Truncate should have cut message content
total_chars = sum(len(str(m.get("content", ""))) for m in result)
assert total_chars < sum(len(str(m.get("content", ""))) for m in messages)
# Review fix #7: verify truncate actually triggered via truncation marker
assert any("...[truncated]" in str(m.get("content", "")) for m in result)
class TestCompressionLogging:
@ -654,3 +679,33 @@ class TestReActEngineWithCompressor:
)
assert result.output == "Answer"
async def test_should_compress_cjk_fallback_path(self):
"""Review fix #5: _should_compress() CJK fallback for compressors
without should_compress() method (e.g. HeadroomCompressor)
Verifies R7: react.py fallback uses estimate_text_tokens, so CJK
long conversations correctly trigger compression.
"""
from agentkit.core.react import ReActEngine
gateway = MagicMock()
engine = ReActEngine(llm_gateway=gateway)
# Mock compressor WITHOUT should_compress() method
# (simulates HeadroomCompressor which doesn't implement it)
mock_compressor = MagicMock(spec=["is_available", "compress", "compress_tool_result"])
mock_compressor.is_available.return_value = True
# CJK long conversation: 10000 CJK chars = 10000 tokens > 8000 threshold
cjk_conversation = [
{"role": "user", "content": "" * 5000},
{"role": "assistant", "content": "" * 5000},
]
result = engine._should_compress(cjk_conversation, mock_compressor)
assert result is True
# ASCII short conversation should not trigger
ascii_short = [{"role": "user", "content": "Hello"}]
result = engine._should_compress(ascii_short, mock_compressor)
assert result is False