863 lines
39 KiB
Python
863 lines
39 KiB
Python
#!/usr/bin/env python3
|
||
"""Knowledge Graph Builder for Fischer AgentKit
|
||
|
||
Scans all Python source files under src/agentkit/ and configs/,
|
||
extracts classes, functions, imports, and builds a comprehensive
|
||
knowledge graph JSON file.
|
||
"""
|
||
|
||
import ast
|
||
import json
|
||
import os
|
||
import sys
|
||
import uuid
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
# Project root
|
||
PROJECT_ROOT = Path("/Users/Chiguyong/Code/Fischer/fischer-agentkit")
|
||
OUTPUT_PATH = PROJECT_ROOT / ".understand-anything" / "knowledge-graph.json"
|
||
|
||
# Directories to scan
|
||
SCAN_DIRS = [
|
||
PROJECT_ROOT / "src" / "agentkit",
|
||
PROJECT_ROOT / "configs",
|
||
]
|
||
|
||
# Architecture layer mapping
|
||
LAYER_MAP = {
|
||
"server": "api",
|
||
"cli": "api",
|
||
"core": "service",
|
||
"orchestrator": "service",
|
||
"skills": "service",
|
||
"router": "service",
|
||
"memory": "data",
|
||
"session": "data",
|
||
"bus": "data",
|
||
"llm": "utility",
|
||
"mcp": "utility",
|
||
"tools": "utility",
|
||
"telemetry": "utility",
|
||
"prompts": "utility",
|
||
"quality": "utility",
|
||
"evaluation": "utility",
|
||
"evolution": "utility",
|
||
"configs": "utility",
|
||
}
|
||
|
||
# Chinese summaries for modules
|
||
MODULE_SUMMARIES = {
|
||
"core": "核心模块 - 定义Agent基类、通信协议、ReAct引擎、任务分发、注册中心等基础组件",
|
||
"core.base": "Agent基类 - 统一Agent生命周期管理,包括启动、停止、任务执行、Handoff、进度上报",
|
||
"core.protocol": "通信协议定义 - 统一消息格式,包括TaskMessage、TaskResult、TaskProgress、HandoffMessage等",
|
||
"core.react": "ReAct推理-行动循环引擎 - 实现Think→Act→Observe循环,支持工具调用和文本解析模式",
|
||
"core.exceptions": "自定义异常体系 - 定义Agent框架所有异常类型",
|
||
"core.dispatcher": "任务分发器 - 通过Redis Queue将任务分发给Agent,支持回调、重试、进度上报",
|
||
"core.registry": "Agent注册中心 - 管理Agent的注册、发现、状态、心跳和负载均衡",
|
||
"core.config_driven": "配置驱动Agent - 从YAML/Dict配置自动组装Agent,支持llm_generate/tool_call/custom三种模式",
|
||
"core.compressor": "上下文压缩器 - 长会话自动压缩历史消息,支持LLM摘要和简单截断策略",
|
||
"core.trace": "执行轨迹记录器 - 记录ReAct执行过程中的完整轨迹,为反思和可观测性提供数据",
|
||
"core.shared_workspace": "共享工作空间 - 基于Redis的Agent间共享状态存储,支持读写、锁操作",
|
||
"core.agent_pool": "Agent实例池 - 运行时管理Agent的创建、获取、删除",
|
||
"core.orchestrator": "多Agent协作编排器 - 实现Orchestrator-Worker模式,支持任务分解、并行执行、自适应编排",
|
||
"core.headroom_compressor": "Headroom AI压缩器 - 基于Headroom AI的上下文压缩实现",
|
||
"core.logging": "日志配置 - 统一日志格式和配置",
|
||
"core.standalone": "独立运行模式 - 支持Agent脱离框架独立运行",
|
||
"core.goal_planner": "目标规划器 - 将复杂目标分解为可执行步骤",
|
||
"core.plan_checker": "计划检查器 - 验证执行计划的完整性和可行性",
|
||
"core.plan_exec_engine": "计划执行引擎 - 执行分解后的计划步骤",
|
||
"core.plan_executor": "计划执行器 - 管理计划执行的完整流程",
|
||
"core.plan_schema": "计划Schema - 执行计划的数据结构定义",
|
||
"core.reflexion": "Reflexion引擎 - 自反思推理,通过自我评估改进输出",
|
||
"core.rewoo": "ReWOO引擎 - 无观察推理,先规划后执行的高效模式",
|
||
|
||
"llm": "LLM网关模块 - 多Provider统一网关,支持OpenAI/Anthropic/Gemini/文心/豆包/元宝等",
|
||
"llm.gateway": "LLM网关 - 统一多Provider调用接口,支持路由、重试、流式输出",
|
||
"llm.protocol": "LLM协议定义 - 定义LLMProvider、LLMRequest、LLMResponse等接口",
|
||
"llm.config": "LLM配置 - 模型别名、Provider配置管理",
|
||
"llm.retry": "LLM重试策略 - 指数退避重试和错误处理",
|
||
"llm.providers": "LLM Provider实现 - 各大模型服务商的具体适配器",
|
||
"llm.providers.openai": "OpenAI Provider - 支持GPT-4/GPT-3.5等模型",
|
||
"llm.providers.anthropic": "Anthropic Provider - 支持Claude系列模型",
|
||
"llm.providers.gemini": "Gemini Provider - 支持Google Gemini模型",
|
||
"llm.providers.wenxin": "文心一言Provider - 支持百度文心大模型",
|
||
"llm.providers.doubao": "豆包Provider - 支持字节豆包大模型",
|
||
"llm.providers.yuanbao": "元宝Provider - 支持腾讯元宝大模型",
|
||
"llm.providers.tracker": "LLM调用追踪器 - 记录和统计LLM调用",
|
||
"llm.providers.usage_store": "LLM用量存储 - Token用量和成本追踪,支持InMemory和Redis后端",
|
||
"llm.cache": "LLM响应缓存 - 基于语义相似度的LLM响应缓存,减少重复调用",
|
||
"llm.cache_key": "缓存键生成 - LLM缓存键的计算和归一化",
|
||
|
||
"chat": "聊天路由模块 - CostAwareRouter三层意图路由和语义路由",
|
||
"chat.skill_routing": "三层意图路由 - CostAwareRouter,正则→启发式→LLM分类逐层升级",
|
||
"chat.semantic_router": "语义路由 - 基于向量相似度的意图路由,支持语义匹配",
|
||
|
||
"quality.cascade_detector": "级联检测器 - 检测Agent输出中的级联失败模式",
|
||
"quality.cascade_state_store": "级联状态存储 - 级联检测状态持久化,支持InMemory和Redis后端",
|
||
"quality.alignment": "对齐守卫 - 检测和修正Agent输出中的对齐偏差",
|
||
|
||
"tools": "工具模块 - 提供Agent可调用的各类工具",
|
||
"tools.base": "工具基类 - 定义Tool接口和标准执行流程",
|
||
"tools.registry": "工具注册中心 - 管理工具的注册、发现、获取",
|
||
"tools.shell": "Shell工具 - 执行系统命令",
|
||
"tools.web_search": "Web搜索工具 - 执行网络搜索",
|
||
"tools.web_crawl": "Web爬取工具 - 爬取网页内容",
|
||
"tools.memory_tool": "记忆工具 - Agent记忆读写操作",
|
||
"tools.ask_human": "人工介入工具 - 请求人类输入",
|
||
"tools.schema_tools": "Schema工具 - JSON Schema相关操作",
|
||
"tools.function_tool": "函数工具 - 将Python函数包装为Tool",
|
||
"tools.agent_tool": "Agent工具 - 将Agent包装为可调用Tool",
|
||
"tools.mcp_tool": "MCP工具 - MCP协议工具适配器",
|
||
"tools.composition": "工具组合 - 支持工具链式组合",
|
||
"tools.baidu_search": "百度搜索工具 - 百度搜索引擎集成",
|
||
"tools.headroom_retrieve": "Headroom检索工具 - Headroom AI知识检索",
|
||
"tools.computer_use": "计算机使用工具 - 桌面操控工具,支持截图、点击、输入等操作",
|
||
"tools.computer_use_session": "计算机使用会话 - 桌面操控会话管理,支持云端和本地(pyautogui)模式",
|
||
"tools.computer_use_recorder": "计算机使用录制器 - 记录桌面操控动作序列",
|
||
"tools.pty_session": "PTY会话 - 伪终端会话管理",
|
||
"tools.terminal_session": "终端会话 - 终端模拟器会话",
|
||
"tools.output_parser": "输出解析器 - 解析Agent输出为结构化数据",
|
||
"tools.skill_install": "技能安装器 - 动态安装技能包",
|
||
|
||
"memory": "记忆模块 - 多层记忆系统,支持工作记忆、情景记忆、语义记忆",
|
||
"memory.base": "记忆基类 - 定义Memory接口",
|
||
"memory.working": "工作记忆 - 基于Redis的短期工作记忆",
|
||
"memory.episodic": "情景记忆 - 基于向量数据库的长期情景记忆",
|
||
"memory.semantic": "语义记忆 - 基于RAG服务的语义知识检索",
|
||
"memory.profile": "用户画像 - 用户偏好和历史信息管理",
|
||
"memory.retriever": "记忆检索器 - 统一多层记忆检索接口",
|
||
"memory.embedder": "嵌入器 - 文本向量化,支持OpenAI Embedding",
|
||
"memory.models": "记忆数据模型 - Pydantic模型定义",
|
||
"memory.rag_loop": "RAG循环 - 检索增强生成的迭代循环",
|
||
"memory.query_transformer": "查询转换器 - 优化检索查询",
|
||
"memory.relevance_scorer": "相关性评分器 - 评估检索结果相关性",
|
||
"memory.contextual_retrieval": "上下文检索 - 基于上下文的检索增强",
|
||
"memory.http_rag": "HTTP RAG服务 - 远程RAG API客户端",
|
||
|
||
"skills": "技能模块 - 定义可复用的Agent技能,包含意图、工具和质量门控",
|
||
"skills.base": "技能基类 - 定义Skill、SkillConfig、IntentConfig等",
|
||
"skills.registry": "技能注册中心 - 管理技能的注册、发现、获取",
|
||
"skills.loader": "技能加载器 - 从YAML配置加载技能定义",
|
||
"skills.pipeline": "技能Pipeline - 技能编排流程",
|
||
"skills.skill_md": "Markdown技能 - 从Markdown文档生成技能",
|
||
"skills.geo_pipeline": "GEO Pipeline - 地理信息处理Pipeline",
|
||
|
||
"orchestrator": "编排模块 - Pipeline编排引擎,支持DAG工作流",
|
||
"orchestrator.pipeline_engine": "Pipeline引擎 - 执行DAG定义的工作流",
|
||
"orchestrator.pipeline_schema": "Pipeline Schema - Pipeline配置模型定义",
|
||
"orchestrator.pipeline_state": "Pipeline状态 - Pipeline执行状态管理",
|
||
"orchestrator.pipeline_models": "Pipeline模型 - Pipeline数据模型",
|
||
"orchestrator.pipeline_loader": "Pipeline加载器 - 从YAML加载Pipeline定义",
|
||
"orchestrator.reflection": "反思模块 - 执行后反思和改进",
|
||
"orchestrator.retry": "重试策略 - Pipeline步骤重试机制",
|
||
"orchestrator.compensation": "补偿机制 - Pipeline失败时的补偿操作",
|
||
"orchestrator.handoff": "Handoff - Agent间任务转交",
|
||
"orchestrator.dynamic_pipeline": "动态Pipeline - 运行时动态构建Pipeline",
|
||
|
||
"router": "路由模块 - 意图路由,将用户输入匹配到对应技能",
|
||
"router.intent": "意图路由器 - 基于LLM的意图识别和路由",
|
||
|
||
"quality": "质量模块 - 输出质量门控和标准化",
|
||
"quality.gate": "质量门控 - 检查Agent输出是否满足质量要求",
|
||
"quality.output": "输出标准化 - 统一Agent输出格式",
|
||
|
||
"prompts": "Prompt模块 - Prompt模板和渲染",
|
||
"prompts.template": "Prompt模板 - 支持变量替换和Section组合",
|
||
"prompts.section": "Prompt Section - 定义Prompt的各组成部分",
|
||
|
||
"bus": "消息总线模块 - Agent间异步通信",
|
||
"bus.protocol": "总线协议 - 定义消息总线接口",
|
||
"bus.message": "消息定义 - Agent间通信消息格式",
|
||
"bus.memory_bus": "内存消息总线 - 基于进程内队列的消息总线",
|
||
"bus.redis_bus": "Redis消息总线 - 基于Redis Pub/Sub的消息总线",
|
||
|
||
"session": "会话模块 - 会话管理和持久化",
|
||
"session.manager": "会话管理器 - 管理对话会话的创建、获取、更新",
|
||
"session.store": "会话存储 - 会话数据的持久化存储",
|
||
"session.models": "会话模型 - 会话相关的数据模型",
|
||
|
||
"server": "服务器模块 - FastAPI HTTP/WebSocket服务",
|
||
"server.app": "FastAPI应用 - 创建和配置FastAPI应用实例",
|
||
"server.config": "服务器配置 - 服务器运行参数配置",
|
||
"server.runner": "服务器运行器 - 启动和管理服务器进程",
|
||
"server.middleware": "中间件 - 请求处理中间件",
|
||
"server.client": "API客户端 - 服务端API客户端封装",
|
||
"server.client_config": "客户端配置 - API客户端配置管理",
|
||
"server.task_store": "任务存储 - 服务端任务状态存储",
|
||
"server.routes": "路由模块 - HTTP/WebSocket路由定义",
|
||
"server.routes.chat": "聊天路由 - 对话API端点",
|
||
"server.routes.ws": "WebSocket路由 - 实时通信端点",
|
||
"server.routes.tasks": "任务路由 - 任务管理API",
|
||
"server.routes.agents": "Agent路由 - Agent管理API",
|
||
"server.routes.skills": "技能路由 - 技能管理API,含@-mention建议端点",
|
||
"server.routes.memory": "记忆路由 - 记忆管理API",
|
||
"server.routes.llm": "LLM路由 - LLM配置和调用API",
|
||
"server.routes.health": "健康检查路由 - 服务健康状态端点",
|
||
"server.routes.metrics": "指标路由 - 运行指标API",
|
||
"server.routes.evolution": "进化路由 - Agent进化管理API",
|
||
"server.routes.evolution_dashboard": "进化仪表盘路由 - 进化数据可视化API",
|
||
"server.routes.kb_management": "知识库管理路由 - 文档上传/搜索/源配置API",
|
||
"server.routes.settings": "设置路由 - 系统配置管理API",
|
||
"server.routes.terminal": "终端路由 - PTY终端会话API",
|
||
"server.routes.workflows": "工作流路由 - Pipeline工作流管理API",
|
||
"server.routes.skill_management": "技能管理路由 - 技能CRUD操作API",
|
||
"server.routes.portal": "门户路由 - Web GUI入口和静态资源",
|
||
|
||
"cli": "命令行模块 - CLI工具",
|
||
"cli.main": "CLI入口 - Typer应用主入口",
|
||
"cli.chat": "聊天命令 - 交互式对话命令",
|
||
"cli.init": "初始化命令 - 项目初始化",
|
||
"cli.onboarding": "引导命令 - 新用户引导流程",
|
||
"cli.skill": "技能命令 - 技能管理CLI",
|
||
"cli.task": "任务命令 - 任务提交和管理CLI",
|
||
"cli.pair": "配对命令 - Agent配对",
|
||
"cli.usage": "使用统计命令 - 使用情况统计",
|
||
"cli.templates": "模板命令 - Agent模板管理",
|
||
|
||
"mcp": "MCP协议模块 - Model Context Protocol集成",
|
||
"mcp.client": "MCP客户端 - 连接MCP服务器",
|
||
"mcp.server": "MCP服务器 - 提供MCP服务",
|
||
"mcp.manager": "MCP管理器 - 管理MCP连接",
|
||
"mcp.transport": "MCP传输层 - MCP通信传输实现",
|
||
|
||
"telemetry": "遥测模块 - 可观测性支持",
|
||
"telemetry.tracing": "分布式追踪 - OpenTelemetry追踪集成",
|
||
"telemetry.metrics": "指标收集 - 运行指标收集和导出",
|
||
"telemetry.setup": "遥测设置 - 初始化遥测组件",
|
||
|
||
"evolution": "进化模块 - Agent自我进化能力",
|
||
"evolution.lifecycle": "进化生命周期 - EvolutionMixin,任务后触发进化",
|
||
"evolution.reflector": "反思器 - 分析任务执行结果,生成改进建议",
|
||
"evolution.llm_reflector": "LLM反思器 - 使用LLM进行深度反思",
|
||
"evolution.prompt_optimizer": "Prompt优化器 - 自动优化Agent Prompt",
|
||
"evolution.strategy_tuner": "策略调优器 - 调整Agent执行策略",
|
||
"evolution.genetic": "遗传算法 - 基于遗传算法的Prompt进化",
|
||
"evolution.fitness": "适应度评估 - 评估进化变体的质量",
|
||
"evolution.ab_tester": "A/B测试 - 对比测试不同进化变体",
|
||
"evolution.evolution_store": "进化存储 - 持久化进化历史",
|
||
"evolution.models": "进化模型 - 进化相关数据模型",
|
||
"evolution.experience_schema": "经验Schema - 经验数据结构定义",
|
||
"evolution.experience_store": "经验存储 - 成功/失败经验持久化",
|
||
"evolution.path_optimizer": "路径优化器 - 分析工具调用路径,推荐更优策略",
|
||
"evolution.pitfall_detector": "陷阱检测器 - 检测重复错误模式",
|
||
|
||
"evaluation": "评估模块 - Agent输出质量评估",
|
||
"evaluation.ragas_evaluator": "RAGAS评估器 - 使用RAGAS框架评估RAG质量",
|
||
|
||
"org": "组织发现模块 - 多Agent组织架构和协作发现",
|
||
"org.context": "组织上下文 - 组织级别的共享上下文管理",
|
||
"org.discovery": "组织发现 - Agent间能力发现和协作匹配",
|
||
|
||
"marketplace": "多Agent市场模块 - Agent间的拍卖和财富分配",
|
||
"marketplace.auction": "拍卖机制 - Agent间的任务拍卖和竞价",
|
||
"marketplace.wealth": "财富管理 - Agent间的价值交换和分配",
|
||
|
||
"configs": "配置模块 - Pipeline和技能YAML配置",
|
||
"configs.geo_server": "GEO服务器 - 地理信息HTTP服务",
|
||
"configs.geo_handlers": "GEO处理器 - 地理信息请求处理",
|
||
"configs.geo_tools": "GEO工具 - 地理信息相关工具定义",
|
||
}
|
||
|
||
|
||
def get_layer(file_path: str) -> str:
|
||
"""Determine architecture layer from file path."""
|
||
parts = file_path.replace("\\", "/").split("/")
|
||
# Check for configs/ prefix
|
||
if "configs" in parts:
|
||
return "utility"
|
||
# For src/agentkit/__init__.py and __main__.py, treat as service
|
||
if parts[-1] in ("__init__.py", "__main__.py") and len(parts) <= 4:
|
||
return "service"
|
||
for part in parts:
|
||
if part in LAYER_MAP:
|
||
return LAYER_MAP[part]
|
||
return "unknown"
|
||
|
||
|
||
def get_module_key(file_path: str) -> str:
|
||
"""Get module key for summary lookup."""
|
||
# Convert file path to module key
|
||
rel = file_path
|
||
if rel.startswith("src/agentkit/"):
|
||
rel = rel[len("src/agentkit/"):]
|
||
elif rel.startswith("configs/"):
|
||
rel = rel[len("configs/"):]
|
||
|
||
# Remove __init__.py and .py suffix
|
||
rel = rel.replace("/__init__.py", "").replace(".py", "")
|
||
return rel
|
||
|
||
|
||
def get_file_summary(file_path: str, docstring: str = "") -> str:
|
||
"""Get Chinese summary for a file."""
|
||
# If we have a docstring, use it as base
|
||
if docstring:
|
||
# Clean up docstring
|
||
doc = docstring.strip().split("\n")[0].strip()
|
||
if doc:
|
||
return doc
|
||
|
||
key = get_module_key(file_path)
|
||
# Try exact match first
|
||
if key in MODULE_SUMMARIES:
|
||
return MODULE_SUMMARIES[key]
|
||
# Try parent module
|
||
parts = key.split("/")
|
||
for i in range(len(parts) - 1, 0, -1):
|
||
parent_key = "/".join(parts[:i])
|
||
if parent_key in MODULE_SUMMARIES:
|
||
return MODULE_SUMMARIES[parent_key]
|
||
return f"模块 {key}"
|
||
|
||
|
||
def estimate_complexity(node: ast.AST) -> str:
|
||
"""Estimate complexity of an AST node."""
|
||
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||
# Count branches, loops, nested functions
|
||
complexity = 1
|
||
for child in ast.walk(node):
|
||
if isinstance(child, (ast.If, ast.While, ast.For, ast.ExceptHandler)):
|
||
complexity += 1
|
||
elif isinstance(child, (ast.And, ast.Or)):
|
||
complexity += 1
|
||
if complexity <= 3:
|
||
return "simple"
|
||
elif complexity <= 8:
|
||
return "moderate"
|
||
return "complex"
|
||
elif isinstance(node, ast.ClassDef):
|
||
methods = [n for n in node.body if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))]
|
||
if len(methods) <= 3:
|
||
return "simple"
|
||
elif len(methods) <= 8:
|
||
return "moderate"
|
||
return "complex"
|
||
return "simple"
|
||
|
||
|
||
def extract_class_info(node: ast.ClassDef, file_path: str) -> dict:
|
||
"""Extract class information from AST node."""
|
||
base_classes = []
|
||
for base in node.bases:
|
||
if isinstance(base, ast.Name):
|
||
base_classes.append(base.id)
|
||
elif isinstance(base, ast.Attribute):
|
||
base_classes.append(ast.dump(base))
|
||
|
||
methods = []
|
||
for item in node.body:
|
||
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||
params = [arg.arg for arg in item.args.args if arg.arg != "self"]
|
||
methods.append({
|
||
"name": item.name,
|
||
"params": params,
|
||
"is_async": isinstance(item, ast.AsyncFunctionDef),
|
||
})
|
||
|
||
# Extract class docstring
|
||
docstring = ast.get_docstring(node) or ""
|
||
|
||
return {
|
||
"name": node.name,
|
||
"base_classes": base_classes,
|
||
"methods": methods,
|
||
"complexity": estimate_complexity(node),
|
||
"docstring": docstring,
|
||
}
|
||
|
||
|
||
def extract_function_info(node: ast.FunctionDef | ast.AsyncFunctionDef) -> dict:
|
||
"""Extract function information from AST node."""
|
||
params = [arg.arg for arg in node.args.args]
|
||
|
||
return_type = ""
|
||
if node.returns:
|
||
if isinstance(node.returns, ast.Name):
|
||
return_type = node.returns.id
|
||
elif isinstance(node.returns, ast.Constant):
|
||
return_type = str(node.returns.value)
|
||
else:
|
||
return_type = ast.dump(node.returns)
|
||
|
||
return {
|
||
"name": node.name,
|
||
"params": params,
|
||
"return_type": return_type,
|
||
"is_async": isinstance(node, ast.AsyncFunctionDef),
|
||
"complexity": estimate_complexity(node),
|
||
}
|
||
|
||
|
||
def extract_imports(tree: ast.AST, file_path: str) -> list[dict]:
|
||
"""Extract import information from AST."""
|
||
imports = []
|
||
for node in ast.walk(tree):
|
||
if isinstance(node, ast.ImportFrom):
|
||
if node.module and (node.module.startswith("agentkit") or node.module.startswith("configs")):
|
||
for alias in node.names:
|
||
imports.append({
|
||
"from_module": node.module,
|
||
"import_name": alias.name,
|
||
})
|
||
elif isinstance(node, ast.Import):
|
||
for alias in node.names:
|
||
if alias.name.startswith("agentkit") or alias.name.startswith("configs"):
|
||
imports.append({
|
||
"from_module": None,
|
||
"import_name": alias.name,
|
||
})
|
||
return imports
|
||
|
||
|
||
def module_to_file_path(module: str) -> str:
|
||
"""Convert Python module path to file path."""
|
||
parts = module.split(".")
|
||
|
||
# Handle agentkit modules
|
||
if module.startswith("agentkit"):
|
||
# Skip "agentkit" prefix, it's under src/
|
||
sub_parts = parts[1:] # skip "agentkit"
|
||
if not sub_parts:
|
||
return "src/agentkit/__init__.py"
|
||
# Try as package __init__.py
|
||
init_path = PROJECT_ROOT / "src" / "agentkit" / "/".join(sub_parts) / "__init__.py"
|
||
if init_path.exists():
|
||
return f"src/agentkit/{'/'.join(sub_parts)}/__init__.py"
|
||
# Try as module.py
|
||
mod_path = PROJECT_ROOT / "src" / "agentkit" / ("/".join(sub_parts) + ".py")
|
||
if mod_path.exists():
|
||
return f"src/agentkit/{'/'.join(sub_parts)}.py"
|
||
|
||
# Handle configs modules
|
||
if module.startswith("configs"):
|
||
sub_parts = parts[1:] # skip "configs"
|
||
if not sub_parts:
|
||
return "configs/__init__.py"
|
||
mod_path = PROJECT_ROOT / "configs" / ("/".join(sub_parts) + ".py")
|
||
if mod_path.exists():
|
||
return f"configs/{'/'.join(sub_parts)}.py"
|
||
|
||
return ""
|
||
|
||
|
||
def scan_file(file_path: Path) -> dict:
|
||
"""Scan a single Python file and extract all information."""
|
||
try:
|
||
source = file_path.read_text(encoding="utf-8")
|
||
tree = ast.parse(source)
|
||
except (SyntaxError, UnicodeDecodeError):
|
||
return {"classes": [], "functions": [], "imports": [], "top_level_functions": [], "docstring": ""}
|
||
|
||
rel_path = str(file_path.relative_to(PROJECT_ROOT))
|
||
|
||
# Extract module docstring
|
||
docstring = ast.get_docstring(tree) or ""
|
||
|
||
classes = []
|
||
functions = []
|
||
top_level_functions = []
|
||
|
||
for node in ast.iter_child_nodes(tree):
|
||
if isinstance(node, ast.ClassDef):
|
||
classes.append(extract_class_info(node, rel_path))
|
||
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||
func_info = extract_function_info(node)
|
||
functions.append(func_info)
|
||
top_level_functions.append(func_info)
|
||
|
||
imports = extract_imports(tree, rel_path)
|
||
|
||
return {
|
||
"classes": classes,
|
||
"functions": top_level_functions,
|
||
"imports": imports,
|
||
"rel_path": rel_path,
|
||
"docstring": docstring,
|
||
}
|
||
|
||
|
||
def build_knowledge_graph():
|
||
"""Build the complete knowledge graph."""
|
||
# Collect all Python files
|
||
py_files = []
|
||
for scan_dir in SCAN_DIRS:
|
||
if scan_dir.exists():
|
||
for py_file in scan_dir.rglob("*.py"):
|
||
py_files.append(py_file)
|
||
|
||
print(f"Found {len(py_files)} Python files to scan")
|
||
|
||
# Scan all files
|
||
file_data = {}
|
||
for py_file in sorted(py_files):
|
||
data = scan_file(py_file)
|
||
rel_path = data["rel_path"]
|
||
file_data[rel_path] = data
|
||
|
||
# Build nodes and edges
|
||
nodes = []
|
||
edges = []
|
||
|
||
# Track all node IDs for edge building
|
||
file_node_ids = {}
|
||
class_node_ids = {}
|
||
func_node_ids = {}
|
||
|
||
# 1. Create file nodes
|
||
for rel_path, data in file_data.items():
|
||
node_id = f"file:{rel_path}"
|
||
layer = get_layer(rel_path)
|
||
summary = get_file_summary(rel_path, data.get("docstring", ""))
|
||
|
||
tags = []
|
||
parts = rel_path.replace("\\", "/").split("/")
|
||
for p in parts:
|
||
if p not in ("src", "agentkit", "__init__.py") and not p.endswith(".py"):
|
||
tags.append(p)
|
||
|
||
nodes.append({
|
||
"id": node_id,
|
||
"type": "file",
|
||
"name": rel_path.split("/")[-1],
|
||
"filePath": rel_path,
|
||
"layer": layer,
|
||
"summary": summary,
|
||
"tags": tags,
|
||
"complexity": "moderate" if data["classes"] or data["functions"] else "simple",
|
||
})
|
||
file_node_ids[rel_path] = node_id
|
||
|
||
# 2. Create class nodes
|
||
for rel_path, data in file_data.items():
|
||
for cls in data["classes"]:
|
||
class_id = f"class:{cls['name']}"
|
||
layer = get_layer(rel_path)
|
||
|
||
method_names = [m["name"] for m in cls["methods"]]
|
||
# Use docstring for summary if available
|
||
docstring = cls.get("docstring", "")
|
||
if docstring:
|
||
# Take first line of docstring
|
||
summary = docstring.strip().split("\n")[0].strip()
|
||
else:
|
||
summary = f"{cls['name']}类"
|
||
if cls["base_classes"]:
|
||
summary += f",继承自{', '.join(cls['base_classes'])}"
|
||
if method_names:
|
||
summary += f",包含方法: {', '.join(method_names[:5])}"
|
||
if len(method_names) > 5:
|
||
summary += f" 等{len(method_names)}个方法"
|
||
|
||
nodes.append({
|
||
"id": class_id,
|
||
"type": "class",
|
||
"name": cls["name"],
|
||
"filePath": rel_path,
|
||
"layer": layer,
|
||
"summary": summary,
|
||
"tags": [cls["name"]],
|
||
"complexity": cls["complexity"],
|
||
})
|
||
class_node_ids[cls["name"]] = class_id
|
||
|
||
# Edge: file contains class
|
||
edges.append({
|
||
"id": f"edge:{uuid.uuid4().hex[:8]}",
|
||
"source": file_node_ids[rel_path],
|
||
"target": class_id,
|
||
"type": "contains",
|
||
"label": f"定义类 {cls['name']}",
|
||
})
|
||
|
||
# Edge: class extends base classes
|
||
for base in cls["base_classes"]:
|
||
if base in class_node_ids:
|
||
edges.append({
|
||
"id": f"edge:{uuid.uuid4().hex[:8]}",
|
||
"source": class_id,
|
||
"target": class_node_ids[base],
|
||
"type": "extends",
|
||
"label": f"继承 {base}",
|
||
})
|
||
|
||
# 3. Create method nodes
|
||
for method in cls["methods"]:
|
||
method_id = f"func:{cls['name']}.{method['name']}"
|
||
async_tag = "异步" if method["is_async"] else ""
|
||
summary = f"{cls['name']}.{method['name']}({', '.join(method['params'])}) {async_tag}方法"
|
||
|
||
nodes.append({
|
||
"id": method_id,
|
||
"type": "function",
|
||
"name": method["name"],
|
||
"filePath": rel_path,
|
||
"layer": layer,
|
||
"summary": summary,
|
||
"tags": [cls["name"], method["name"]],
|
||
"complexity": "simple",
|
||
})
|
||
func_node_ids[f"{cls['name']}.{method['name']}"] = method_id
|
||
|
||
# Edge: class contains method
|
||
edges.append({
|
||
"id": f"edge:{uuid.uuid4().hex[:8]}",
|
||
"source": class_id,
|
||
"target": method_id,
|
||
"type": "contains",
|
||
"label": f"方法 {method['name']}",
|
||
})
|
||
|
||
# 4. Create top-level function nodes
|
||
for rel_path, data in file_data.items():
|
||
for func in data["functions"]:
|
||
func_id = f"func:{func['name']}"
|
||
async_tag = "异步" if func["is_async"] else ""
|
||
summary = f"{func['name']}({', '.join(func['params'])}) {async_tag}函数"
|
||
if func["return_type"]:
|
||
summary += f" → {func['return_type']}"
|
||
|
||
nodes.append({
|
||
"id": func_id,
|
||
"type": "function",
|
||
"name": func["name"],
|
||
"filePath": rel_path,
|
||
"layer": get_layer(rel_path),
|
||
"summary": summary,
|
||
"tags": [func["name"]],
|
||
"complexity": func["complexity"],
|
||
})
|
||
func_node_ids[func["name"]] = func_id
|
||
|
||
# Edge: file contains function
|
||
edges.append({
|
||
"id": f"edge:{uuid.uuid4().hex[:8]}",
|
||
"source": file_node_ids[rel_path],
|
||
"target": func_id,
|
||
"type": "contains",
|
||
"label": f"定义函数 {func['name']}",
|
||
})
|
||
|
||
# 5. Create import edges
|
||
for rel_path, data in file_data.items():
|
||
for imp in data["imports"]:
|
||
if imp["from_module"]:
|
||
target_path = module_to_file_path(imp["from_module"])
|
||
if target_path and target_path in file_node_ids:
|
||
edges.append({
|
||
"id": f"edge:{uuid.uuid4().hex[:8]}",
|
||
"source": file_node_ids[rel_path],
|
||
"target": file_node_ids[target_path],
|
||
"type": "imports",
|
||
"label": f"导入 {imp['import_name']}",
|
||
})
|
||
|
||
# 6. Build tours
|
||
tours = build_tours(file_data, file_node_ids, class_node_ids, func_node_ids)
|
||
|
||
# Get git commit hash
|
||
git_hash = "d9d1b16e5911ad958cd8ae38958058bea13f3fcc"
|
||
|
||
# Build final JSON
|
||
graph = {
|
||
"version": "1.0.0",
|
||
"project": {
|
||
"name": "Fischer AgentKit",
|
||
"languages": ["python"],
|
||
"frameworks": ["FastAPI", "Pydantic", "SQLAlchemy", "Typer", "Redis"],
|
||
"description": "AI驱动的Agent框架,支持ReAct引擎、多LLM网关、Pipeline编排、自适应反思和消息总线",
|
||
"analyzedAt": datetime.now(timezone.utc).isoformat(),
|
||
"gitCommitHash": git_hash,
|
||
},
|
||
"nodes": nodes,
|
||
"edges": edges,
|
||
"tours": tours,
|
||
}
|
||
|
||
return graph
|
||
|
||
|
||
def build_tours(file_data, file_node_ids, class_node_ids, func_node_ids):
|
||
"""Build guided learning tours."""
|
||
tours = []
|
||
|
||
# Tour 1: Entry Points
|
||
tours.append({
|
||
"id": "tour:entry-points",
|
||
"name": "入口点导览",
|
||
"description": "从项目入口开始,了解如何启动和使用AgentKit",
|
||
"steps": [
|
||
{"nodeId": "file:src/agentkit/__main__.py", "why": "Python模块入口,python -m agentkit"},
|
||
{"nodeId": "file:src/agentkit/__init__.py", "why": "包入口,导出核心公共API"},
|
||
{"nodeId": "file:src/agentkit/cli/main.py", "why": "CLI主入口,Typer应用定义"},
|
||
{"nodeId": "file:src/agentkit/server/app.py", "why": "HTTP服务入口,FastAPI应用创建"},
|
||
],
|
||
})
|
||
|
||
# Tour 2: Core Agent Lifecycle
|
||
tours.append({
|
||
"id": "tour:agent-lifecycle",
|
||
"name": "Agent生命周期导览",
|
||
"description": "深入理解Agent从创建到执行任务的完整生命周期",
|
||
"steps": [
|
||
{"nodeId": "class:BaseAgent", "why": "Agent基类,定义标准生命周期和可插拔能力"},
|
||
{"nodeId": "func:BaseAgent.start", "why": "Agent启动流程:连接Redis→注册→心跳→监听"},
|
||
{"nodeId": "func:BaseAgent.execute", "why": "任务执行框架方法:on_task_start→handle_task→quality_gate→on_task_complete"},
|
||
{"nodeId": "func:BaseAgent.handle_task", "why": "抽象方法,子类实现业务逻辑"},
|
||
{"nodeId": "class:ConfigDrivenAgent", "why": "配置驱动Agent,从YAML自动组装"},
|
||
{"nodeId": "func:ConfigDrivenAgent.handle_task", "why": "根据execution_mode路由到react/direct/custom模式"},
|
||
{"nodeId": "class:AgentConfig", "why": "Agent配置模型,支持YAML/Dict构建"},
|
||
],
|
||
})
|
||
|
||
# Tour 3: ReAct Engine
|
||
tours.append({
|
||
"id": "tour:react-engine",
|
||
"name": "ReAct引擎导览",
|
||
"description": "理解ReAct推理-行动循环的核心实现",
|
||
"steps": [
|
||
{"nodeId": "class:ReActEngine", "why": "ReAct引擎核心,Think→Act→Observe循环"},
|
||
{"nodeId": "func:ReActEngine.execute", "why": "执行ReAct循环,支持超时和取消"},
|
||
{"nodeId": "func:ReActEngine.execute_stream", "why": "流式执行,逐步yield事件"},
|
||
{"nodeId": "func:ReActEngine._execute_tool", "why": "工具调用执行,处理成功和失败"},
|
||
{"nodeId": "func:ReActEngine._parse_text_tool_calls", "why": "文本解析模式,支持Action和代码块格式"},
|
||
{"nodeId": "class:ReActStep", "why": "单步记录数据结构"},
|
||
{"nodeId": "class:ReActResult", "why": "ReAct执行结果数据结构"},
|
||
{"nodeId": "class:ReActEvent", "why": "流式执行事件数据结构"},
|
||
],
|
||
})
|
||
|
||
# Tour 4: LLM Gateway
|
||
tours.append({
|
||
"id": "tour:llm-gateway",
|
||
"name": "LLM网关导览",
|
||
"description": "了解多Provider统一网关的设计和实现",
|
||
"steps": [
|
||
{"nodeId": "class:LLMGateway", "why": "LLM网关核心,统一多Provider调用接口"},
|
||
{"nodeId": "file:src/agentkit/llm/protocol.py", "why": "LLM协议定义,LLMProvider/LLMRequest/LLMResponse"},
|
||
{"nodeId": "file:src/agentkit/llm/config.py", "why": "模型别名和Provider配置"},
|
||
{"nodeId": "file:src/agentkit/llm/providers/openai.py", "why": "OpenAI Provider实现"},
|
||
{"nodeId": "file:src/agentkit/llm/providers/anthropic.py", "why": "Anthropic Provider实现"},
|
||
{"nodeId": "file:src/agentkit/llm/retry.py", "why": "LLM重试策略"},
|
||
],
|
||
})
|
||
|
||
# Tour 5: Memory System
|
||
tours.append({
|
||
"id": "tour:memory-system",
|
||
"name": "记忆系统导览",
|
||
"description": "理解多层记忆系统的架构和实现",
|
||
"steps": [
|
||
{"nodeId": "file:src/agentkit/memory/base.py", "why": "记忆基类接口定义"},
|
||
{"nodeId": "file:src/agentkit/memory/retriever.py", "why": "统一记忆检索器,整合工作/情景/语义记忆"},
|
||
{"nodeId": "file:src/agentkit/memory/working.py", "why": "工作记忆 - 基于Redis的短期记忆"},
|
||
{"nodeId": "file:src/agentkit/memory/episodic.py", "why": "情景记忆 - 基于向量的长期记忆"},
|
||
{"nodeId": "file:src/agentkit/memory/semantic.py", "why": "语义记忆 - RAG服务集成"},
|
||
{"nodeId": "file:src/agentkit/memory/embedder.py", "why": "文本向量化嵌入器"},
|
||
],
|
||
})
|
||
|
||
# Tour 6: Orchestration
|
||
tours.append({
|
||
"id": "tour:orchestration",
|
||
"name": "编排系统导览",
|
||
"description": "了解多Agent协作编排和Pipeline引擎",
|
||
"steps": [
|
||
{"nodeId": "class:Orchestrator", "why": "多Agent协作编排器,Orchestrator-Worker模式"},
|
||
{"nodeId": "func:Orchestrator.execute", "why": "编排执行:分解→执行→汇总"},
|
||
{"nodeId": "func:Orchestrator.execute_adaptive", "why": "自适应编排:执行→评估→再分解循环"},
|
||
{"nodeId": "file:src/agentkit/orchestrator/pipeline_engine.py", "why": "Pipeline引擎,执行DAG工作流"},
|
||
{"nodeId": "file:src/agentkit/orchestrator/pipeline_schema.py", "why": "Pipeline配置模型"},
|
||
{"nodeId": "file:src/agentkit/orchestrator/reflection.py", "why": "执行后反思模块"},
|
||
],
|
||
})
|
||
|
||
# Tour 7: Skills & Router
|
||
tours.append({
|
||
"id": "tour:skills-router",
|
||
"name": "技能与路由导览",
|
||
"description": "了解技能定义、注册和意图路由机制",
|
||
"steps": [
|
||
{"nodeId": "file:src/agentkit/skills/base.py", "why": "技能基类和配置定义"},
|
||
{"nodeId": "class:SkillRegistry", "why": "技能注册中心"},
|
||
{"nodeId": "file:src/agentkit/skills/loader.py", "why": "从YAML加载技能定义"},
|
||
{"nodeId": "class:IntentRouter", "why": "意图路由器,匹配用户输入到技能"},
|
||
{"nodeId": "file:src/agentkit/router/intent.py", "why": "意图路由实现"},
|
||
],
|
||
})
|
||
|
||
# Tour 8: Evolution
|
||
tours.append({
|
||
"id": "tour:evolution",
|
||
"name": "进化系统导览",
|
||
"description": "了解Agent自我进化的机制和实现",
|
||
"steps": [
|
||
{"nodeId": "file:src/agentkit/evolution/lifecycle.py", "why": "进化生命周期Mixin"},
|
||
{"nodeId": "file:src/agentkit/evolution/reflector.py", "why": "反思器 - 分析结果生成改进建议"},
|
||
{"nodeId": "file:src/agentkit/evolution/prompt_optimizer.py", "why": "Prompt自动优化"},
|
||
{"nodeId": "file:src/agentkit/evolution/genetic.py", "why": "遗传算法进化"},
|
||
{"nodeId": "file:src/agentkit/evolution/ab_tester.py", "why": "A/B测试对比"},
|
||
],
|
||
})
|
||
|
||
# Tour 9: Infrastructure
|
||
tours.append({
|
||
"id": "tour:infrastructure",
|
||
"name": "基础设施导览",
|
||
"description": "了解消息总线、会话管理、遥测等基础设施",
|
||
"steps": [
|
||
{"nodeId": "file:src/agentkit/bus/protocol.py", "why": "消息总线协议接口"},
|
||
{"nodeId": "file:src/agentkit/bus/redis_bus.py", "why": "Redis Pub/Sub消息总线"},
|
||
{"nodeId": "file:src/agentkit/bus/memory_bus.py", "why": "进程内消息总线"},
|
||
{"nodeId": "file:src/agentkit/session/manager.py", "why": "会话管理器"},
|
||
{"nodeId": "file:src/agentkit/telemetry/tracing.py", "why": "OpenTelemetry追踪集成"},
|
||
{"nodeId": "file:src/agentkit/telemetry/metrics.py", "why": "运行指标收集"},
|
||
],
|
||
})
|
||
|
||
return tours
|
||
|
||
|
||
def main():
|
||
"""Main entry point."""
|
||
print("Building knowledge graph for Fischer AgentKit...")
|
||
|
||
graph = build_knowledge_graph()
|
||
|
||
# Ensure output directory exists
|
||
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||
|
||
# Write JSON
|
||
with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
|
||
json.dump(graph, f, ensure_ascii=False, indent=2)
|
||
|
||
print(f"Knowledge graph written to {OUTPUT_PATH}")
|
||
print(f" Nodes: {len(graph['nodes'])}")
|
||
print(f" Edges: {len(graph['edges'])}")
|
||
print(f" Tours: {len(graph['tours'])}")
|
||
|
||
# Print layer statistics
|
||
layer_counts = {}
|
||
for node in graph["nodes"]:
|
||
layer = node["layer"]
|
||
layer_counts[layer] = layer_counts.get(layer, 0) + 1
|
||
|
||
print("\nLayer distribution:")
|
||
for layer, count in sorted(layer_counts.items()):
|
||
print(f" {layer}: {count} nodes")
|
||
|
||
# Print type statistics
|
||
type_counts = {}
|
||
for node in graph["nodes"]:
|
||
t = node["type"]
|
||
type_counts[t] = type_counts.get(t, 0) + 1
|
||
|
||
print("\nNode type distribution:")
|
||
for t, count in sorted(type_counts.items()):
|
||
print(f" {t}: {count} nodes")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|