fischer-agentkit/.understand-anything/build_kg.py

863 lines
39 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""Knowledge Graph Builder for Fischer AgentKit
Scans all Python source files under src/agentkit/ and configs/,
extracts classes, functions, imports, and builds a comprehensive
knowledge graph JSON file.
"""
import ast
import json
import os
import sys
import uuid
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
# Project root
PROJECT_ROOT = Path("/Users/Chiguyong/Code/Fischer/fischer-agentkit")
OUTPUT_PATH = PROJECT_ROOT / ".understand-anything" / "knowledge-graph.json"
# Directories to scan
SCAN_DIRS = [
PROJECT_ROOT / "src" / "agentkit",
PROJECT_ROOT / "configs",
]
# Architecture layer mapping
LAYER_MAP = {
"server": "api",
"cli": "api",
"core": "service",
"orchestrator": "service",
"skills": "service",
"router": "service",
"memory": "data",
"session": "data",
"bus": "data",
"llm": "utility",
"mcp": "utility",
"tools": "utility",
"telemetry": "utility",
"prompts": "utility",
"quality": "utility",
"evaluation": "utility",
"evolution": "utility",
"configs": "utility",
}
# Chinese summaries for modules
MODULE_SUMMARIES = {
"core": "核心模块 - 定义Agent基类、通信协议、ReAct引擎、任务分发、注册中心等基础组件",
"core.base": "Agent基类 - 统一Agent生命周期管理包括启动、停止、任务执行、Handoff、进度上报",
"core.protocol": "通信协议定义 - 统一消息格式包括TaskMessage、TaskResult、TaskProgress、HandoffMessage等",
"core.react": "ReAct推理-行动循环引擎 - 实现Think→Act→Observe循环支持工具调用和文本解析模式",
"core.exceptions": "自定义异常体系 - 定义Agent框架所有异常类型",
"core.dispatcher": "任务分发器 - 通过Redis Queue将任务分发给Agent支持回调、重试、进度上报",
"core.registry": "Agent注册中心 - 管理Agent的注册、发现、状态、心跳和负载均衡",
"core.config_driven": "配置驱动Agent - 从YAML/Dict配置自动组装Agent支持llm_generate/tool_call/custom三种模式",
"core.compressor": "上下文压缩器 - 长会话自动压缩历史消息支持LLM摘要和简单截断策略",
"core.trace": "执行轨迹记录器 - 记录ReAct执行过程中的完整轨迹为反思和可观测性提供数据",
"core.shared_workspace": "共享工作空间 - 基于Redis的Agent间共享状态存储支持读写、锁操作",
"core.agent_pool": "Agent实例池 - 运行时管理Agent的创建、获取、删除",
"core.orchestrator": "多Agent协作编排器 - 实现Orchestrator-Worker模式支持任务分解、并行执行、自适应编排",
"core.headroom_compressor": "Headroom AI压缩器 - 基于Headroom AI的上下文压缩实现",
"core.logging": "日志配置 - 统一日志格式和配置",
"core.standalone": "独立运行模式 - 支持Agent脱离框架独立运行",
"core.goal_planner": "目标规划器 - 将复杂目标分解为可执行步骤",
"core.plan_checker": "计划检查器 - 验证执行计划的完整性和可行性",
"core.plan_exec_engine": "计划执行引擎 - 执行分解后的计划步骤",
"core.plan_executor": "计划执行器 - 管理计划执行的完整流程",
"core.plan_schema": "计划Schema - 执行计划的数据结构定义",
"core.reflexion": "Reflexion引擎 - 自反思推理,通过自我评估改进输出",
"core.rewoo": "ReWOO引擎 - 无观察推理,先规划后执行的高效模式",
"llm": "LLM网关模块 - 多Provider统一网关支持OpenAI/Anthropic/Gemini/文心/豆包/元宝等",
"llm.gateway": "LLM网关 - 统一多Provider调用接口支持路由、重试、流式输出",
"llm.protocol": "LLM协议定义 - 定义LLMProvider、LLMRequest、LLMResponse等接口",
"llm.config": "LLM配置 - 模型别名、Provider配置管理",
"llm.retry": "LLM重试策略 - 指数退避重试和错误处理",
"llm.providers": "LLM Provider实现 - 各大模型服务商的具体适配器",
"llm.providers.openai": "OpenAI Provider - 支持GPT-4/GPT-3.5等模型",
"llm.providers.anthropic": "Anthropic Provider - 支持Claude系列模型",
"llm.providers.gemini": "Gemini Provider - 支持Google Gemini模型",
"llm.providers.wenxin": "文心一言Provider - 支持百度文心大模型",
"llm.providers.doubao": "豆包Provider - 支持字节豆包大模型",
"llm.providers.yuanbao": "元宝Provider - 支持腾讯元宝大模型",
"llm.providers.tracker": "LLM调用追踪器 - 记录和统计LLM调用",
"llm.providers.usage_store": "LLM用量存储 - Token用量和成本追踪支持InMemory和Redis后端",
"llm.cache": "LLM响应缓存 - 基于语义相似度的LLM响应缓存减少重复调用",
"llm.cache_key": "缓存键生成 - LLM缓存键的计算和归一化",
"chat": "聊天路由模块 - CostAwareRouter三层意图路由和语义路由",
"chat.skill_routing": "三层意图路由 - CostAwareRouter正则→启发式→LLM分类逐层升级",
"chat.semantic_router": "语义路由 - 基于向量相似度的意图路由,支持语义匹配",
"quality.cascade_detector": "级联检测器 - 检测Agent输出中的级联失败模式",
"quality.cascade_state_store": "级联状态存储 - 级联检测状态持久化支持InMemory和Redis后端",
"quality.alignment": "对齐守卫 - 检测和修正Agent输出中的对齐偏差",
"tools": "工具模块 - 提供Agent可调用的各类工具",
"tools.base": "工具基类 - 定义Tool接口和标准执行流程",
"tools.registry": "工具注册中心 - 管理工具的注册、发现、获取",
"tools.shell": "Shell工具 - 执行系统命令",
"tools.web_search": "Web搜索工具 - 执行网络搜索",
"tools.web_crawl": "Web爬取工具 - 爬取网页内容",
"tools.memory_tool": "记忆工具 - Agent记忆读写操作",
"tools.ask_human": "人工介入工具 - 请求人类输入",
"tools.schema_tools": "Schema工具 - JSON Schema相关操作",
"tools.function_tool": "函数工具 - 将Python函数包装为Tool",
"tools.agent_tool": "Agent工具 - 将Agent包装为可调用Tool",
"tools.mcp_tool": "MCP工具 - MCP协议工具适配器",
"tools.composition": "工具组合 - 支持工具链式组合",
"tools.baidu_search": "百度搜索工具 - 百度搜索引擎集成",
"tools.headroom_retrieve": "Headroom检索工具 - Headroom AI知识检索",
"tools.computer_use": "计算机使用工具 - 桌面操控工具,支持截图、点击、输入等操作",
"tools.computer_use_session": "计算机使用会话 - 桌面操控会话管理,支持云端和本地(pyautogui)模式",
"tools.computer_use_recorder": "计算机使用录制器 - 记录桌面操控动作序列",
"tools.pty_session": "PTY会话 - 伪终端会话管理",
"tools.terminal_session": "终端会话 - 终端模拟器会话",
"tools.output_parser": "输出解析器 - 解析Agent输出为结构化数据",
"tools.skill_install": "技能安装器 - 动态安装技能包",
"memory": "记忆模块 - 多层记忆系统,支持工作记忆、情景记忆、语义记忆",
"memory.base": "记忆基类 - 定义Memory接口",
"memory.working": "工作记忆 - 基于Redis的短期工作记忆",
"memory.episodic": "情景记忆 - 基于向量数据库的长期情景记忆",
"memory.semantic": "语义记忆 - 基于RAG服务的语义知识检索",
"memory.profile": "用户画像 - 用户偏好和历史信息管理",
"memory.retriever": "记忆检索器 - 统一多层记忆检索接口",
"memory.embedder": "嵌入器 - 文本向量化支持OpenAI Embedding",
"memory.models": "记忆数据模型 - Pydantic模型定义",
"memory.rag_loop": "RAG循环 - 检索增强生成的迭代循环",
"memory.query_transformer": "查询转换器 - 优化检索查询",
"memory.relevance_scorer": "相关性评分器 - 评估检索结果相关性",
"memory.contextual_retrieval": "上下文检索 - 基于上下文的检索增强",
"memory.http_rag": "HTTP RAG服务 - 远程RAG API客户端",
"skills": "技能模块 - 定义可复用的Agent技能包含意图、工具和质量门控",
"skills.base": "技能基类 - 定义Skill、SkillConfig、IntentConfig等",
"skills.registry": "技能注册中心 - 管理技能的注册、发现、获取",
"skills.loader": "技能加载器 - 从YAML配置加载技能定义",
"skills.pipeline": "技能Pipeline - 技能编排流程",
"skills.skill_md": "Markdown技能 - 从Markdown文档生成技能",
"skills.geo_pipeline": "GEO Pipeline - 地理信息处理Pipeline",
"orchestrator": "编排模块 - Pipeline编排引擎支持DAG工作流",
"orchestrator.pipeline_engine": "Pipeline引擎 - 执行DAG定义的工作流",
"orchestrator.pipeline_schema": "Pipeline Schema - Pipeline配置模型定义",
"orchestrator.pipeline_state": "Pipeline状态 - Pipeline执行状态管理",
"orchestrator.pipeline_models": "Pipeline模型 - Pipeline数据模型",
"orchestrator.pipeline_loader": "Pipeline加载器 - 从YAML加载Pipeline定义",
"orchestrator.reflection": "反思模块 - 执行后反思和改进",
"orchestrator.retry": "重试策略 - Pipeline步骤重试机制",
"orchestrator.compensation": "补偿机制 - Pipeline失败时的补偿操作",
"orchestrator.handoff": "Handoff - Agent间任务转交",
"orchestrator.dynamic_pipeline": "动态Pipeline - 运行时动态构建Pipeline",
"router": "路由模块 - 意图路由,将用户输入匹配到对应技能",
"router.intent": "意图路由器 - 基于LLM的意图识别和路由",
"quality": "质量模块 - 输出质量门控和标准化",
"quality.gate": "质量门控 - 检查Agent输出是否满足质量要求",
"quality.output": "输出标准化 - 统一Agent输出格式",
"prompts": "Prompt模块 - Prompt模板和渲染",
"prompts.template": "Prompt模板 - 支持变量替换和Section组合",
"prompts.section": "Prompt Section - 定义Prompt的各组成部分",
"bus": "消息总线模块 - Agent间异步通信",
"bus.protocol": "总线协议 - 定义消息总线接口",
"bus.message": "消息定义 - Agent间通信消息格式",
"bus.memory_bus": "内存消息总线 - 基于进程内队列的消息总线",
"bus.redis_bus": "Redis消息总线 - 基于Redis Pub/Sub的消息总线",
"session": "会话模块 - 会话管理和持久化",
"session.manager": "会话管理器 - 管理对话会话的创建、获取、更新",
"session.store": "会话存储 - 会话数据的持久化存储",
"session.models": "会话模型 - 会话相关的数据模型",
"server": "服务器模块 - FastAPI HTTP/WebSocket服务",
"server.app": "FastAPI应用 - 创建和配置FastAPI应用实例",
"server.config": "服务器配置 - 服务器运行参数配置",
"server.runner": "服务器运行器 - 启动和管理服务器进程",
"server.middleware": "中间件 - 请求处理中间件",
"server.client": "API客户端 - 服务端API客户端封装",
"server.client_config": "客户端配置 - API客户端配置管理",
"server.task_store": "任务存储 - 服务端任务状态存储",
"server.routes": "路由模块 - HTTP/WebSocket路由定义",
"server.routes.chat": "聊天路由 - 对话API端点",
"server.routes.ws": "WebSocket路由 - 实时通信端点",
"server.routes.tasks": "任务路由 - 任务管理API",
"server.routes.agents": "Agent路由 - Agent管理API",
"server.routes.skills": "技能路由 - 技能管理API含@-mention建议端点",
"server.routes.memory": "记忆路由 - 记忆管理API",
"server.routes.llm": "LLM路由 - LLM配置和调用API",
"server.routes.health": "健康检查路由 - 服务健康状态端点",
"server.routes.metrics": "指标路由 - 运行指标API",
"server.routes.evolution": "进化路由 - Agent进化管理API",
"server.routes.evolution_dashboard": "进化仪表盘路由 - 进化数据可视化API",
"server.routes.kb_management": "知识库管理路由 - 文档上传/搜索/源配置API",
"server.routes.settings": "设置路由 - 系统配置管理API",
"server.routes.terminal": "终端路由 - PTY终端会话API",
"server.routes.workflows": "工作流路由 - Pipeline工作流管理API",
"server.routes.skill_management": "技能管理路由 - 技能CRUD操作API",
"server.routes.portal": "门户路由 - Web GUI入口和静态资源",
"cli": "命令行模块 - CLI工具",
"cli.main": "CLI入口 - Typer应用主入口",
"cli.chat": "聊天命令 - 交互式对话命令",
"cli.init": "初始化命令 - 项目初始化",
"cli.onboarding": "引导命令 - 新用户引导流程",
"cli.skill": "技能命令 - 技能管理CLI",
"cli.task": "任务命令 - 任务提交和管理CLI",
"cli.pair": "配对命令 - Agent配对",
"cli.usage": "使用统计命令 - 使用情况统计",
"cli.templates": "模板命令 - Agent模板管理",
"mcp": "MCP协议模块 - Model Context Protocol集成",
"mcp.client": "MCP客户端 - 连接MCP服务器",
"mcp.server": "MCP服务器 - 提供MCP服务",
"mcp.manager": "MCP管理器 - 管理MCP连接",
"mcp.transport": "MCP传输层 - MCP通信传输实现",
"telemetry": "遥测模块 - 可观测性支持",
"telemetry.tracing": "分布式追踪 - OpenTelemetry追踪集成",
"telemetry.metrics": "指标收集 - 运行指标收集和导出",
"telemetry.setup": "遥测设置 - 初始化遥测组件",
"evolution": "进化模块 - Agent自我进化能力",
"evolution.lifecycle": "进化生命周期 - EvolutionMixin任务后触发进化",
"evolution.reflector": "反思器 - 分析任务执行结果,生成改进建议",
"evolution.llm_reflector": "LLM反思器 - 使用LLM进行深度反思",
"evolution.prompt_optimizer": "Prompt优化器 - 自动优化Agent Prompt",
"evolution.strategy_tuner": "策略调优器 - 调整Agent执行策略",
"evolution.genetic": "遗传算法 - 基于遗传算法的Prompt进化",
"evolution.fitness": "适应度评估 - 评估进化变体的质量",
"evolution.ab_tester": "A/B测试 - 对比测试不同进化变体",
"evolution.evolution_store": "进化存储 - 持久化进化历史",
"evolution.models": "进化模型 - 进化相关数据模型",
"evolution.experience_schema": "经验Schema - 经验数据结构定义",
"evolution.experience_store": "经验存储 - 成功/失败经验持久化",
"evolution.path_optimizer": "路径优化器 - 分析工具调用路径,推荐更优策略",
"evolution.pitfall_detector": "陷阱检测器 - 检测重复错误模式",
"evaluation": "评估模块 - Agent输出质量评估",
"evaluation.ragas_evaluator": "RAGAS评估器 - 使用RAGAS框架评估RAG质量",
"org": "组织发现模块 - 多Agent组织架构和协作发现",
"org.context": "组织上下文 - 组织级别的共享上下文管理",
"org.discovery": "组织发现 - Agent间能力发现和协作匹配",
"marketplace": "多Agent市场模块 - Agent间的拍卖和财富分配",
"marketplace.auction": "拍卖机制 - Agent间的任务拍卖和竞价",
"marketplace.wealth": "财富管理 - Agent间的价值交换和分配",
"configs": "配置模块 - Pipeline和技能YAML配置",
"configs.geo_server": "GEO服务器 - 地理信息HTTP服务",
"configs.geo_handlers": "GEO处理器 - 地理信息请求处理",
"configs.geo_tools": "GEO工具 - 地理信息相关工具定义",
}
def get_layer(file_path: str) -> str:
"""Determine architecture layer from file path."""
parts = file_path.replace("\\", "/").split("/")
# Check for configs/ prefix
if "configs" in parts:
return "utility"
# For src/agentkit/__init__.py and __main__.py, treat as service
if parts[-1] in ("__init__.py", "__main__.py") and len(parts) <= 4:
return "service"
for part in parts:
if part in LAYER_MAP:
return LAYER_MAP[part]
return "unknown"
def get_module_key(file_path: str) -> str:
"""Get module key for summary lookup."""
# Convert file path to module key
rel = file_path
if rel.startswith("src/agentkit/"):
rel = rel[len("src/agentkit/"):]
elif rel.startswith("configs/"):
rel = rel[len("configs/"):]
# Remove __init__.py and .py suffix
rel = rel.replace("/__init__.py", "").replace(".py", "")
return rel
def get_file_summary(file_path: str, docstring: str = "") -> str:
"""Get Chinese summary for a file."""
# If we have a docstring, use it as base
if docstring:
# Clean up docstring
doc = docstring.strip().split("\n")[0].strip()
if doc:
return doc
key = get_module_key(file_path)
# Try exact match first
if key in MODULE_SUMMARIES:
return MODULE_SUMMARIES[key]
# Try parent module
parts = key.split("/")
for i in range(len(parts) - 1, 0, -1):
parent_key = "/".join(parts[:i])
if parent_key in MODULE_SUMMARIES:
return MODULE_SUMMARIES[parent_key]
return f"模块 {key}"
def estimate_complexity(node: ast.AST) -> str:
"""Estimate complexity of an AST node."""
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
# Count branches, loops, nested functions
complexity = 1
for child in ast.walk(node):
if isinstance(child, (ast.If, ast.While, ast.For, ast.ExceptHandler)):
complexity += 1
elif isinstance(child, (ast.And, ast.Or)):
complexity += 1
if complexity <= 3:
return "simple"
elif complexity <= 8:
return "moderate"
return "complex"
elif isinstance(node, ast.ClassDef):
methods = [n for n in node.body if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))]
if len(methods) <= 3:
return "simple"
elif len(methods) <= 8:
return "moderate"
return "complex"
return "simple"
def extract_class_info(node: ast.ClassDef, file_path: str) -> dict:
"""Extract class information from AST node."""
base_classes = []
for base in node.bases:
if isinstance(base, ast.Name):
base_classes.append(base.id)
elif isinstance(base, ast.Attribute):
base_classes.append(ast.dump(base))
methods = []
for item in node.body:
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
params = [arg.arg for arg in item.args.args if arg.arg != "self"]
methods.append({
"name": item.name,
"params": params,
"is_async": isinstance(item, ast.AsyncFunctionDef),
})
# Extract class docstring
docstring = ast.get_docstring(node) or ""
return {
"name": node.name,
"base_classes": base_classes,
"methods": methods,
"complexity": estimate_complexity(node),
"docstring": docstring,
}
def extract_function_info(node: ast.FunctionDef | ast.AsyncFunctionDef) -> dict:
"""Extract function information from AST node."""
params = [arg.arg for arg in node.args.args]
return_type = ""
if node.returns:
if isinstance(node.returns, ast.Name):
return_type = node.returns.id
elif isinstance(node.returns, ast.Constant):
return_type = str(node.returns.value)
else:
return_type = ast.dump(node.returns)
return {
"name": node.name,
"params": params,
"return_type": return_type,
"is_async": isinstance(node, ast.AsyncFunctionDef),
"complexity": estimate_complexity(node),
}
def extract_imports(tree: ast.AST, file_path: str) -> list[dict]:
"""Extract import information from AST."""
imports = []
for node in ast.walk(tree):
if isinstance(node, ast.ImportFrom):
if node.module and (node.module.startswith("agentkit") or node.module.startswith("configs")):
for alias in node.names:
imports.append({
"from_module": node.module,
"import_name": alias.name,
})
elif isinstance(node, ast.Import):
for alias in node.names:
if alias.name.startswith("agentkit") or alias.name.startswith("configs"):
imports.append({
"from_module": None,
"import_name": alias.name,
})
return imports
def module_to_file_path(module: str) -> str:
"""Convert Python module path to file path."""
parts = module.split(".")
# Handle agentkit modules
if module.startswith("agentkit"):
# Skip "agentkit" prefix, it's under src/
sub_parts = parts[1:] # skip "agentkit"
if not sub_parts:
return "src/agentkit/__init__.py"
# Try as package __init__.py
init_path = PROJECT_ROOT / "src" / "agentkit" / "/".join(sub_parts) / "__init__.py"
if init_path.exists():
return f"src/agentkit/{'/'.join(sub_parts)}/__init__.py"
# Try as module.py
mod_path = PROJECT_ROOT / "src" / "agentkit" / ("/".join(sub_parts) + ".py")
if mod_path.exists():
return f"src/agentkit/{'/'.join(sub_parts)}.py"
# Handle configs modules
if module.startswith("configs"):
sub_parts = parts[1:] # skip "configs"
if not sub_parts:
return "configs/__init__.py"
mod_path = PROJECT_ROOT / "configs" / ("/".join(sub_parts) + ".py")
if mod_path.exists():
return f"configs/{'/'.join(sub_parts)}.py"
return ""
def scan_file(file_path: Path) -> dict:
"""Scan a single Python file and extract all information."""
try:
source = file_path.read_text(encoding="utf-8")
tree = ast.parse(source)
except (SyntaxError, UnicodeDecodeError):
return {"classes": [], "functions": [], "imports": [], "top_level_functions": [], "docstring": ""}
rel_path = str(file_path.relative_to(PROJECT_ROOT))
# Extract module docstring
docstring = ast.get_docstring(tree) or ""
classes = []
functions = []
top_level_functions = []
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.ClassDef):
classes.append(extract_class_info(node, rel_path))
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
func_info = extract_function_info(node)
functions.append(func_info)
top_level_functions.append(func_info)
imports = extract_imports(tree, rel_path)
return {
"classes": classes,
"functions": top_level_functions,
"imports": imports,
"rel_path": rel_path,
"docstring": docstring,
}
def build_knowledge_graph():
"""Build the complete knowledge graph."""
# Collect all Python files
py_files = []
for scan_dir in SCAN_DIRS:
if scan_dir.exists():
for py_file in scan_dir.rglob("*.py"):
py_files.append(py_file)
print(f"Found {len(py_files)} Python files to scan")
# Scan all files
file_data = {}
for py_file in sorted(py_files):
data = scan_file(py_file)
rel_path = data["rel_path"]
file_data[rel_path] = data
# Build nodes and edges
nodes = []
edges = []
# Track all node IDs for edge building
file_node_ids = {}
class_node_ids = {}
func_node_ids = {}
# 1. Create file nodes
for rel_path, data in file_data.items():
node_id = f"file:{rel_path}"
layer = get_layer(rel_path)
summary = get_file_summary(rel_path, data.get("docstring", ""))
tags = []
parts = rel_path.replace("\\", "/").split("/")
for p in parts:
if p not in ("src", "agentkit", "__init__.py") and not p.endswith(".py"):
tags.append(p)
nodes.append({
"id": node_id,
"type": "file",
"name": rel_path.split("/")[-1],
"filePath": rel_path,
"layer": layer,
"summary": summary,
"tags": tags,
"complexity": "moderate" if data["classes"] or data["functions"] else "simple",
})
file_node_ids[rel_path] = node_id
# 2. Create class nodes
for rel_path, data in file_data.items():
for cls in data["classes"]:
class_id = f"class:{cls['name']}"
layer = get_layer(rel_path)
method_names = [m["name"] for m in cls["methods"]]
# Use docstring for summary if available
docstring = cls.get("docstring", "")
if docstring:
# Take first line of docstring
summary = docstring.strip().split("\n")[0].strip()
else:
summary = f"{cls['name']}"
if cls["base_classes"]:
summary += f",继承自{', '.join(cls['base_classes'])}"
if method_names:
summary += f",包含方法: {', '.join(method_names[:5])}"
if len(method_names) > 5:
summary += f"{len(method_names)}个方法"
nodes.append({
"id": class_id,
"type": "class",
"name": cls["name"],
"filePath": rel_path,
"layer": layer,
"summary": summary,
"tags": [cls["name"]],
"complexity": cls["complexity"],
})
class_node_ids[cls["name"]] = class_id
# Edge: file contains class
edges.append({
"id": f"edge:{uuid.uuid4().hex[:8]}",
"source": file_node_ids[rel_path],
"target": class_id,
"type": "contains",
"label": f"定义类 {cls['name']}",
})
# Edge: class extends base classes
for base in cls["base_classes"]:
if base in class_node_ids:
edges.append({
"id": f"edge:{uuid.uuid4().hex[:8]}",
"source": class_id,
"target": class_node_ids[base],
"type": "extends",
"label": f"继承 {base}",
})
# 3. Create method nodes
for method in cls["methods"]:
method_id = f"func:{cls['name']}.{method['name']}"
async_tag = "异步" if method["is_async"] else ""
summary = f"{cls['name']}.{method['name']}({', '.join(method['params'])}) {async_tag}方法"
nodes.append({
"id": method_id,
"type": "function",
"name": method["name"],
"filePath": rel_path,
"layer": layer,
"summary": summary,
"tags": [cls["name"], method["name"]],
"complexity": "simple",
})
func_node_ids[f"{cls['name']}.{method['name']}"] = method_id
# Edge: class contains method
edges.append({
"id": f"edge:{uuid.uuid4().hex[:8]}",
"source": class_id,
"target": method_id,
"type": "contains",
"label": f"方法 {method['name']}",
})
# 4. Create top-level function nodes
for rel_path, data in file_data.items():
for func in data["functions"]:
func_id = f"func:{func['name']}"
async_tag = "异步" if func["is_async"] else ""
summary = f"{func['name']}({', '.join(func['params'])}) {async_tag}函数"
if func["return_type"]:
summary += f"{func['return_type']}"
nodes.append({
"id": func_id,
"type": "function",
"name": func["name"],
"filePath": rel_path,
"layer": get_layer(rel_path),
"summary": summary,
"tags": [func["name"]],
"complexity": func["complexity"],
})
func_node_ids[func["name"]] = func_id
# Edge: file contains function
edges.append({
"id": f"edge:{uuid.uuid4().hex[:8]}",
"source": file_node_ids[rel_path],
"target": func_id,
"type": "contains",
"label": f"定义函数 {func['name']}",
})
# 5. Create import edges
for rel_path, data in file_data.items():
for imp in data["imports"]:
if imp["from_module"]:
target_path = module_to_file_path(imp["from_module"])
if target_path and target_path in file_node_ids:
edges.append({
"id": f"edge:{uuid.uuid4().hex[:8]}",
"source": file_node_ids[rel_path],
"target": file_node_ids[target_path],
"type": "imports",
"label": f"导入 {imp['import_name']}",
})
# 6. Build tours
tours = build_tours(file_data, file_node_ids, class_node_ids, func_node_ids)
# Get git commit hash
git_hash = "d9d1b16e5911ad958cd8ae38958058bea13f3fcc"
# Build final JSON
graph = {
"version": "1.0.0",
"project": {
"name": "Fischer AgentKit",
"languages": ["python"],
"frameworks": ["FastAPI", "Pydantic", "SQLAlchemy", "Typer", "Redis"],
"description": "AI驱动的Agent框架支持ReAct引擎、多LLM网关、Pipeline编排、自适应反思和消息总线",
"analyzedAt": datetime.now(timezone.utc).isoformat(),
"gitCommitHash": git_hash,
},
"nodes": nodes,
"edges": edges,
"tours": tours,
}
return graph
def build_tours(file_data, file_node_ids, class_node_ids, func_node_ids):
"""Build guided learning tours."""
tours = []
# Tour 1: Entry Points
tours.append({
"id": "tour:entry-points",
"name": "入口点导览",
"description": "从项目入口开始了解如何启动和使用AgentKit",
"steps": [
{"nodeId": "file:src/agentkit/__main__.py", "why": "Python模块入口python -m agentkit"},
{"nodeId": "file:src/agentkit/__init__.py", "why": "包入口导出核心公共API"},
{"nodeId": "file:src/agentkit/cli/main.py", "why": "CLI主入口Typer应用定义"},
{"nodeId": "file:src/agentkit/server/app.py", "why": "HTTP服务入口FastAPI应用创建"},
],
})
# Tour 2: Core Agent Lifecycle
tours.append({
"id": "tour:agent-lifecycle",
"name": "Agent生命周期导览",
"description": "深入理解Agent从创建到执行任务的完整生命周期",
"steps": [
{"nodeId": "class:BaseAgent", "why": "Agent基类定义标准生命周期和可插拔能力"},
{"nodeId": "func:BaseAgent.start", "why": "Agent启动流程连接Redis→注册→心跳→监听"},
{"nodeId": "func:BaseAgent.execute", "why": "任务执行框架方法on_task_start→handle_task→quality_gate→on_task_complete"},
{"nodeId": "func:BaseAgent.handle_task", "why": "抽象方法,子类实现业务逻辑"},
{"nodeId": "class:ConfigDrivenAgent", "why": "配置驱动Agent从YAML自动组装"},
{"nodeId": "func:ConfigDrivenAgent.handle_task", "why": "根据execution_mode路由到react/direct/custom模式"},
{"nodeId": "class:AgentConfig", "why": "Agent配置模型支持YAML/Dict构建"},
],
})
# Tour 3: ReAct Engine
tours.append({
"id": "tour:react-engine",
"name": "ReAct引擎导览",
"description": "理解ReAct推理-行动循环的核心实现",
"steps": [
{"nodeId": "class:ReActEngine", "why": "ReAct引擎核心Think→Act→Observe循环"},
{"nodeId": "func:ReActEngine.execute", "why": "执行ReAct循环支持超时和取消"},
{"nodeId": "func:ReActEngine.execute_stream", "why": "流式执行逐步yield事件"},
{"nodeId": "func:ReActEngine._execute_tool", "why": "工具调用执行,处理成功和失败"},
{"nodeId": "func:ReActEngine._parse_text_tool_calls", "why": "文本解析模式支持Action和代码块格式"},
{"nodeId": "class:ReActStep", "why": "单步记录数据结构"},
{"nodeId": "class:ReActResult", "why": "ReAct执行结果数据结构"},
{"nodeId": "class:ReActEvent", "why": "流式执行事件数据结构"},
],
})
# Tour 4: LLM Gateway
tours.append({
"id": "tour:llm-gateway",
"name": "LLM网关导览",
"description": "了解多Provider统一网关的设计和实现",
"steps": [
{"nodeId": "class:LLMGateway", "why": "LLM网关核心统一多Provider调用接口"},
{"nodeId": "file:src/agentkit/llm/protocol.py", "why": "LLM协议定义LLMProvider/LLMRequest/LLMResponse"},
{"nodeId": "file:src/agentkit/llm/config.py", "why": "模型别名和Provider配置"},
{"nodeId": "file:src/agentkit/llm/providers/openai.py", "why": "OpenAI Provider实现"},
{"nodeId": "file:src/agentkit/llm/providers/anthropic.py", "why": "Anthropic Provider实现"},
{"nodeId": "file:src/agentkit/llm/retry.py", "why": "LLM重试策略"},
],
})
# Tour 5: Memory System
tours.append({
"id": "tour:memory-system",
"name": "记忆系统导览",
"description": "理解多层记忆系统的架构和实现",
"steps": [
{"nodeId": "file:src/agentkit/memory/base.py", "why": "记忆基类接口定义"},
{"nodeId": "file:src/agentkit/memory/retriever.py", "why": "统一记忆检索器,整合工作/情景/语义记忆"},
{"nodeId": "file:src/agentkit/memory/working.py", "why": "工作记忆 - 基于Redis的短期记忆"},
{"nodeId": "file:src/agentkit/memory/episodic.py", "why": "情景记忆 - 基于向量的长期记忆"},
{"nodeId": "file:src/agentkit/memory/semantic.py", "why": "语义记忆 - RAG服务集成"},
{"nodeId": "file:src/agentkit/memory/embedder.py", "why": "文本向量化嵌入器"},
],
})
# Tour 6: Orchestration
tours.append({
"id": "tour:orchestration",
"name": "编排系统导览",
"description": "了解多Agent协作编排和Pipeline引擎",
"steps": [
{"nodeId": "class:Orchestrator", "why": "多Agent协作编排器Orchestrator-Worker模式"},
{"nodeId": "func:Orchestrator.execute", "why": "编排执行:分解→执行→汇总"},
{"nodeId": "func:Orchestrator.execute_adaptive", "why": "自适应编排:执行→评估→再分解循环"},
{"nodeId": "file:src/agentkit/orchestrator/pipeline_engine.py", "why": "Pipeline引擎执行DAG工作流"},
{"nodeId": "file:src/agentkit/orchestrator/pipeline_schema.py", "why": "Pipeline配置模型"},
{"nodeId": "file:src/agentkit/orchestrator/reflection.py", "why": "执行后反思模块"},
],
})
# Tour 7: Skills & Router
tours.append({
"id": "tour:skills-router",
"name": "技能与路由导览",
"description": "了解技能定义、注册和意图路由机制",
"steps": [
{"nodeId": "file:src/agentkit/skills/base.py", "why": "技能基类和配置定义"},
{"nodeId": "class:SkillRegistry", "why": "技能注册中心"},
{"nodeId": "file:src/agentkit/skills/loader.py", "why": "从YAML加载技能定义"},
{"nodeId": "class:IntentRouter", "why": "意图路由器,匹配用户输入到技能"},
{"nodeId": "file:src/agentkit/router/intent.py", "why": "意图路由实现"},
],
})
# Tour 8: Evolution
tours.append({
"id": "tour:evolution",
"name": "进化系统导览",
"description": "了解Agent自我进化的机制和实现",
"steps": [
{"nodeId": "file:src/agentkit/evolution/lifecycle.py", "why": "进化生命周期Mixin"},
{"nodeId": "file:src/agentkit/evolution/reflector.py", "why": "反思器 - 分析结果生成改进建议"},
{"nodeId": "file:src/agentkit/evolution/prompt_optimizer.py", "why": "Prompt自动优化"},
{"nodeId": "file:src/agentkit/evolution/genetic.py", "why": "遗传算法进化"},
{"nodeId": "file:src/agentkit/evolution/ab_tester.py", "why": "A/B测试对比"},
],
})
# Tour 9: Infrastructure
tours.append({
"id": "tour:infrastructure",
"name": "基础设施导览",
"description": "了解消息总线、会话管理、遥测等基础设施",
"steps": [
{"nodeId": "file:src/agentkit/bus/protocol.py", "why": "消息总线协议接口"},
{"nodeId": "file:src/agentkit/bus/redis_bus.py", "why": "Redis Pub/Sub消息总线"},
{"nodeId": "file:src/agentkit/bus/memory_bus.py", "why": "进程内消息总线"},
{"nodeId": "file:src/agentkit/session/manager.py", "why": "会话管理器"},
{"nodeId": "file:src/agentkit/telemetry/tracing.py", "why": "OpenTelemetry追踪集成"},
{"nodeId": "file:src/agentkit/telemetry/metrics.py", "why": "运行指标收集"},
],
})
return tours
def main():
"""Main entry point."""
print("Building knowledge graph for Fischer AgentKit...")
graph = build_knowledge_graph()
# Ensure output directory exists
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
# Write JSON
with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
json.dump(graph, f, ensure_ascii=False, indent=2)
print(f"Knowledge graph written to {OUTPUT_PATH}")
print(f" Nodes: {len(graph['nodes'])}")
print(f" Edges: {len(graph['edges'])}")
print(f" Tours: {len(graph['tours'])}")
# Print layer statistics
layer_counts = {}
for node in graph["nodes"]:
layer = node["layer"]
layer_counts[layer] = layer_counts.get(layer, 0) + 1
print("\nLayer distribution:")
for layer, count in sorted(layer_counts.items()):
print(f" {layer}: {count} nodes")
# Print type statistics
type_counts = {}
for node in graph["nodes"]:
t = node["type"]
type_counts[t] = type_counts.get(t, 0) + 1
print("\nNode type distribution:")
for t, count in sorted(type_counts.items()):
print(f" {t}: {count} nodes")
if __name__ == "__main__":
main()