#!/usr/bin/env python3 """Knowledge Graph Builder for Fischer AgentKit Scans all Python source files under src/agentkit/ and configs/, extracts classes, functions, imports, and builds a comprehensive knowledge graph JSON file. """ import ast import json import os import sys import uuid from datetime import datetime, timezone from pathlib import Path from typing import Any # Project root PROJECT_ROOT = Path("/Users/Chiguyong/Code/Fischer/fischer-agentkit") OUTPUT_PATH = PROJECT_ROOT / ".understand-anything" / "knowledge-graph.json" # Directories to scan SCAN_DIRS = [ PROJECT_ROOT / "src" / "agentkit", PROJECT_ROOT / "configs", ] # Architecture layer mapping LAYER_MAP = { "server": "api", "cli": "api", "core": "service", "orchestrator": "service", "skills": "service", "router": "service", "memory": "data", "session": "data", "bus": "data", "llm": "utility", "mcp": "utility", "tools": "utility", "telemetry": "utility", "prompts": "utility", "quality": "utility", "evaluation": "utility", "evolution": "utility", "configs": "utility", } # Chinese summaries for modules MODULE_SUMMARIES = { "core": "核心模块 - 定义Agent基类、通信协议、ReAct引擎、任务分发、注册中心等基础组件", "core.base": "Agent基类 - 统一Agent生命周期管理,包括启动、停止、任务执行、Handoff、进度上报", "core.protocol": "通信协议定义 - 统一消息格式,包括TaskMessage、TaskResult、TaskProgress、HandoffMessage等", "core.react": "ReAct推理-行动循环引擎 - 实现Think→Act→Observe循环,支持工具调用和文本解析模式", "core.exceptions": "自定义异常体系 - 定义Agent框架所有异常类型", "core.dispatcher": "任务分发器 - 通过Redis Queue将任务分发给Agent,支持回调、重试、进度上报", "core.registry": "Agent注册中心 - 管理Agent的注册、发现、状态、心跳和负载均衡", "core.config_driven": "配置驱动Agent - 从YAML/Dict配置自动组装Agent,支持llm_generate/tool_call/custom三种模式", "core.compressor": "上下文压缩器 - 长会话自动压缩历史消息,支持LLM摘要和简单截断策略", "core.trace": "执行轨迹记录器 - 记录ReAct执行过程中的完整轨迹,为反思和可观测性提供数据", "core.shared_workspace": "共享工作空间 - 基于Redis的Agent间共享状态存储,支持读写、锁操作", "core.agent_pool": "Agent实例池 - 运行时管理Agent的创建、获取、删除", "core.orchestrator": "多Agent协作编排器 - 实现Orchestrator-Worker模式,支持任务分解、并行执行、自适应编排", "core.headroom_compressor": "Headroom AI压缩器 - 基于Headroom AI的上下文压缩实现", "core.logging": "日志配置 - 统一日志格式和配置", "core.standalone": "独立运行模式 - 支持Agent脱离框架独立运行", "core.goal_planner": "目标规划器 - 将复杂目标分解为可执行步骤", "core.plan_checker": "计划检查器 - 验证执行计划的完整性和可行性", "core.plan_exec_engine": "计划执行引擎 - 执行分解后的计划步骤", "core.plan_executor": "计划执行器 - 管理计划执行的完整流程", "core.plan_schema": "计划Schema - 执行计划的数据结构定义", "core.reflexion": "Reflexion引擎 - 自反思推理,通过自我评估改进输出", "core.rewoo": "ReWOO引擎 - 无观察推理,先规划后执行的高效模式", "llm": "LLM网关模块 - 多Provider统一网关,支持OpenAI/Anthropic/Gemini/文心/豆包/元宝等", "llm.gateway": "LLM网关 - 统一多Provider调用接口,支持路由、重试、流式输出", "llm.protocol": "LLM协议定义 - 定义LLMProvider、LLMRequest、LLMResponse等接口", "llm.config": "LLM配置 - 模型别名、Provider配置管理", "llm.retry": "LLM重试策略 - 指数退避重试和错误处理", "llm.providers": "LLM Provider实现 - 各大模型服务商的具体适配器", "llm.providers.openai": "OpenAI Provider - 支持GPT-4/GPT-3.5等模型", "llm.providers.anthropic": "Anthropic Provider - 支持Claude系列模型", "llm.providers.gemini": "Gemini Provider - 支持Google Gemini模型", "llm.providers.wenxin": "文心一言Provider - 支持百度文心大模型", "llm.providers.doubao": "豆包Provider - 支持字节豆包大模型", "llm.providers.yuanbao": "元宝Provider - 支持腾讯元宝大模型", "llm.providers.tracker": "LLM调用追踪器 - 记录和统计LLM调用", "llm.providers.usage_store": "LLM用量存储 - Token用量和成本追踪,支持InMemory和Redis后端", "llm.cache": "LLM响应缓存 - 基于语义相似度的LLM响应缓存,减少重复调用", "llm.cache_key": "缓存键生成 - LLM缓存键的计算和归一化", "chat": "聊天路由模块 - CostAwareRouter三层意图路由和语义路由", "chat.skill_routing": "三层意图路由 - CostAwareRouter,正则→启发式→LLM分类逐层升级", "chat.semantic_router": "语义路由 - 基于向量相似度的意图路由,支持语义匹配", "quality.cascade_detector": "级联检测器 - 检测Agent输出中的级联失败模式", "quality.cascade_state_store": "级联状态存储 - 级联检测状态持久化,支持InMemory和Redis后端", "quality.alignment": "对齐守卫 - 检测和修正Agent输出中的对齐偏差", "tools": "工具模块 - 提供Agent可调用的各类工具", "tools.base": "工具基类 - 定义Tool接口和标准执行流程", "tools.registry": "工具注册中心 - 管理工具的注册、发现、获取", "tools.shell": "Shell工具 - 执行系统命令", "tools.web_search": "Web搜索工具 - 执行网络搜索", "tools.web_crawl": "Web爬取工具 - 爬取网页内容", "tools.memory_tool": "记忆工具 - Agent记忆读写操作", "tools.ask_human": "人工介入工具 - 请求人类输入", "tools.schema_tools": "Schema工具 - JSON Schema相关操作", "tools.function_tool": "函数工具 - 将Python函数包装为Tool", "tools.agent_tool": "Agent工具 - 将Agent包装为可调用Tool", "tools.mcp_tool": "MCP工具 - MCP协议工具适配器", "tools.composition": "工具组合 - 支持工具链式组合", "tools.baidu_search": "百度搜索工具 - 百度搜索引擎集成", "tools.headroom_retrieve": "Headroom检索工具 - Headroom AI知识检索", "tools.computer_use": "计算机使用工具 - 桌面操控工具,支持截图、点击、输入等操作", "tools.computer_use_session": "计算机使用会话 - 桌面操控会话管理,支持云端和本地(pyautogui)模式", "tools.computer_use_recorder": "计算机使用录制器 - 记录桌面操控动作序列", "tools.pty_session": "PTY会话 - 伪终端会话管理", "tools.terminal_session": "终端会话 - 终端模拟器会话", "tools.output_parser": "输出解析器 - 解析Agent输出为结构化数据", "tools.skill_install": "技能安装器 - 动态安装技能包", "memory": "记忆模块 - 多层记忆系统,支持工作记忆、情景记忆、语义记忆", "memory.base": "记忆基类 - 定义Memory接口", "memory.working": "工作记忆 - 基于Redis的短期工作记忆", "memory.episodic": "情景记忆 - 基于向量数据库的长期情景记忆", "memory.semantic": "语义记忆 - 基于RAG服务的语义知识检索", "memory.profile": "用户画像 - 用户偏好和历史信息管理", "memory.retriever": "记忆检索器 - 统一多层记忆检索接口", "memory.embedder": "嵌入器 - 文本向量化,支持OpenAI Embedding", "memory.models": "记忆数据模型 - Pydantic模型定义", "memory.rag_loop": "RAG循环 - 检索增强生成的迭代循环", "memory.query_transformer": "查询转换器 - 优化检索查询", "memory.relevance_scorer": "相关性评分器 - 评估检索结果相关性", "memory.contextual_retrieval": "上下文检索 - 基于上下文的检索增强", "memory.http_rag": "HTTP RAG服务 - 远程RAG API客户端", "skills": "技能模块 - 定义可复用的Agent技能,包含意图、工具和质量门控", "skills.base": "技能基类 - 定义Skill、SkillConfig、IntentConfig等", "skills.registry": "技能注册中心 - 管理技能的注册、发现、获取", "skills.loader": "技能加载器 - 从YAML配置加载技能定义", "skills.pipeline": "技能Pipeline - 技能编排流程", "skills.skill_md": "Markdown技能 - 从Markdown文档生成技能", "skills.geo_pipeline": "GEO Pipeline - 地理信息处理Pipeline", "orchestrator": "编排模块 - Pipeline编排引擎,支持DAG工作流", "orchestrator.pipeline_engine": "Pipeline引擎 - 执行DAG定义的工作流", "orchestrator.pipeline_schema": "Pipeline Schema - Pipeline配置模型定义", "orchestrator.pipeline_state": "Pipeline状态 - Pipeline执行状态管理", "orchestrator.pipeline_models": "Pipeline模型 - Pipeline数据模型", "orchestrator.pipeline_loader": "Pipeline加载器 - 从YAML加载Pipeline定义", "orchestrator.reflection": "反思模块 - 执行后反思和改进", "orchestrator.retry": "重试策略 - Pipeline步骤重试机制", "orchestrator.compensation": "补偿机制 - Pipeline失败时的补偿操作", "orchestrator.handoff": "Handoff - Agent间任务转交", "orchestrator.dynamic_pipeline": "动态Pipeline - 运行时动态构建Pipeline", "router": "路由模块 - 意图路由,将用户输入匹配到对应技能", "router.intent": "意图路由器 - 基于LLM的意图识别和路由", "quality": "质量模块 - 输出质量门控和标准化", "quality.gate": "质量门控 - 检查Agent输出是否满足质量要求", "quality.output": "输出标准化 - 统一Agent输出格式", "prompts": "Prompt模块 - Prompt模板和渲染", "prompts.template": "Prompt模板 - 支持变量替换和Section组合", "prompts.section": "Prompt Section - 定义Prompt的各组成部分", "bus": "消息总线模块 - Agent间异步通信", "bus.protocol": "总线协议 - 定义消息总线接口", "bus.message": "消息定义 - Agent间通信消息格式", "bus.memory_bus": "内存消息总线 - 基于进程内队列的消息总线", "bus.redis_bus": "Redis消息总线 - 基于Redis Pub/Sub的消息总线", "session": "会话模块 - 会话管理和持久化", "session.manager": "会话管理器 - 管理对话会话的创建、获取、更新", "session.store": "会话存储 - 会话数据的持久化存储", "session.models": "会话模型 - 会话相关的数据模型", "server": "服务器模块 - FastAPI HTTP/WebSocket服务", "server.app": "FastAPI应用 - 创建和配置FastAPI应用实例", "server.config": "服务器配置 - 服务器运行参数配置", "server.runner": "服务器运行器 - 启动和管理服务器进程", "server.middleware": "中间件 - 请求处理中间件", "server.client": "API客户端 - 服务端API客户端封装", "server.client_config": "客户端配置 - API客户端配置管理", "server.task_store": "任务存储 - 服务端任务状态存储", "server.routes": "路由模块 - HTTP/WebSocket路由定义", "server.routes.chat": "聊天路由 - 对话API端点", "server.routes.ws": "WebSocket路由 - 实时通信端点", "server.routes.tasks": "任务路由 - 任务管理API", "server.routes.agents": "Agent路由 - Agent管理API", "server.routes.skills": "技能路由 - 技能管理API,含@-mention建议端点", "server.routes.memory": "记忆路由 - 记忆管理API", "server.routes.llm": "LLM路由 - LLM配置和调用API", "server.routes.health": "健康检查路由 - 服务健康状态端点", "server.routes.metrics": "指标路由 - 运行指标API", "server.routes.evolution": "进化路由 - Agent进化管理API", "server.routes.evolution_dashboard": "进化仪表盘路由 - 进化数据可视化API", "server.routes.kb_management": "知识库管理路由 - 文档上传/搜索/源配置API", "server.routes.settings": "设置路由 - 系统配置管理API", "server.routes.terminal": "终端路由 - PTY终端会话API", "server.routes.workflows": "工作流路由 - Pipeline工作流管理API", "server.routes.skill_management": "技能管理路由 - 技能CRUD操作API", "server.routes.portal": "门户路由 - Web GUI入口和静态资源", "cli": "命令行模块 - CLI工具", "cli.main": "CLI入口 - Typer应用主入口", "cli.chat": "聊天命令 - 交互式对话命令", "cli.init": "初始化命令 - 项目初始化", "cli.onboarding": "引导命令 - 新用户引导流程", "cli.skill": "技能命令 - 技能管理CLI", "cli.task": "任务命令 - 任务提交和管理CLI", "cli.pair": "配对命令 - Agent配对", "cli.usage": "使用统计命令 - 使用情况统计", "cli.templates": "模板命令 - Agent模板管理", "mcp": "MCP协议模块 - Model Context Protocol集成", "mcp.client": "MCP客户端 - 连接MCP服务器", "mcp.server": "MCP服务器 - 提供MCP服务", "mcp.manager": "MCP管理器 - 管理MCP连接", "mcp.transport": "MCP传输层 - MCP通信传输实现", "telemetry": "遥测模块 - 可观测性支持", "telemetry.tracing": "分布式追踪 - OpenTelemetry追踪集成", "telemetry.metrics": "指标收集 - 运行指标收集和导出", "telemetry.setup": "遥测设置 - 初始化遥测组件", "evolution": "进化模块 - Agent自我进化能力", "evolution.lifecycle": "进化生命周期 - EvolutionMixin,任务后触发进化", "evolution.reflector": "反思器 - 分析任务执行结果,生成改进建议", "evolution.llm_reflector": "LLM反思器 - 使用LLM进行深度反思", "evolution.prompt_optimizer": "Prompt优化器 - 自动优化Agent Prompt", "evolution.strategy_tuner": "策略调优器 - 调整Agent执行策略", "evolution.genetic": "遗传算法 - 基于遗传算法的Prompt进化", "evolution.fitness": "适应度评估 - 评估进化变体的质量", "evolution.ab_tester": "A/B测试 - 对比测试不同进化变体", "evolution.evolution_store": "进化存储 - 持久化进化历史", "evolution.models": "进化模型 - 进化相关数据模型", "evolution.experience_schema": "经验Schema - 经验数据结构定义", "evolution.experience_store": "经验存储 - 成功/失败经验持久化", "evolution.path_optimizer": "路径优化器 - 分析工具调用路径,推荐更优策略", "evolution.pitfall_detector": "陷阱检测器 - 检测重复错误模式", "evaluation": "评估模块 - Agent输出质量评估", "evaluation.ragas_evaluator": "RAGAS评估器 - 使用RAGAS框架评估RAG质量", "org": "组织发现模块 - 多Agent组织架构和协作发现", "org.context": "组织上下文 - 组织级别的共享上下文管理", "org.discovery": "组织发现 - Agent间能力发现和协作匹配", "marketplace": "多Agent市场模块 - Agent间的拍卖和财富分配", "marketplace.auction": "拍卖机制 - Agent间的任务拍卖和竞价", "marketplace.wealth": "财富管理 - Agent间的价值交换和分配", "configs": "配置模块 - Pipeline和技能YAML配置", "configs.geo_server": "GEO服务器 - 地理信息HTTP服务", "configs.geo_handlers": "GEO处理器 - 地理信息请求处理", "configs.geo_tools": "GEO工具 - 地理信息相关工具定义", } def get_layer(file_path: str) -> str: """Determine architecture layer from file path.""" parts = file_path.replace("\\", "/").split("/") # Check for configs/ prefix if "configs" in parts: return "utility" # For src/agentkit/__init__.py and __main__.py, treat as service if parts[-1] in ("__init__.py", "__main__.py") and len(parts) <= 4: return "service" for part in parts: if part in LAYER_MAP: return LAYER_MAP[part] return "unknown" def get_module_key(file_path: str) -> str: """Get module key for summary lookup.""" # Convert file path to module key rel = file_path if rel.startswith("src/agentkit/"): rel = rel[len("src/agentkit/"):] elif rel.startswith("configs/"): rel = rel[len("configs/"):] # Remove __init__.py and .py suffix rel = rel.replace("/__init__.py", "").replace(".py", "") return rel def get_file_summary(file_path: str, docstring: str = "") -> str: """Get Chinese summary for a file.""" # If we have a docstring, use it as base if docstring: # Clean up docstring doc = docstring.strip().split("\n")[0].strip() if doc: return doc key = get_module_key(file_path) # Try exact match first if key in MODULE_SUMMARIES: return MODULE_SUMMARIES[key] # Try parent module parts = key.split("/") for i in range(len(parts) - 1, 0, -1): parent_key = "/".join(parts[:i]) if parent_key in MODULE_SUMMARIES: return MODULE_SUMMARIES[parent_key] return f"模块 {key}" def estimate_complexity(node: ast.AST) -> str: """Estimate complexity of an AST node.""" if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): # Count branches, loops, nested functions complexity = 1 for child in ast.walk(node): if isinstance(child, (ast.If, ast.While, ast.For, ast.ExceptHandler)): complexity += 1 elif isinstance(child, (ast.And, ast.Or)): complexity += 1 if complexity <= 3: return "simple" elif complexity <= 8: return "moderate" return "complex" elif isinstance(node, ast.ClassDef): methods = [n for n in node.body if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))] if len(methods) <= 3: return "simple" elif len(methods) <= 8: return "moderate" return "complex" return "simple" def extract_class_info(node: ast.ClassDef, file_path: str) -> dict: """Extract class information from AST node.""" base_classes = [] for base in node.bases: if isinstance(base, ast.Name): base_classes.append(base.id) elif isinstance(base, ast.Attribute): base_classes.append(ast.dump(base)) methods = [] for item in node.body: if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)): params = [arg.arg for arg in item.args.args if arg.arg != "self"] methods.append({ "name": item.name, "params": params, "is_async": isinstance(item, ast.AsyncFunctionDef), }) # Extract class docstring docstring = ast.get_docstring(node) or "" return { "name": node.name, "base_classes": base_classes, "methods": methods, "complexity": estimate_complexity(node), "docstring": docstring, } def extract_function_info(node: ast.FunctionDef | ast.AsyncFunctionDef) -> dict: """Extract function information from AST node.""" params = [arg.arg for arg in node.args.args] return_type = "" if node.returns: if isinstance(node.returns, ast.Name): return_type = node.returns.id elif isinstance(node.returns, ast.Constant): return_type = str(node.returns.value) else: return_type = ast.dump(node.returns) return { "name": node.name, "params": params, "return_type": return_type, "is_async": isinstance(node, ast.AsyncFunctionDef), "complexity": estimate_complexity(node), } def extract_imports(tree: ast.AST, file_path: str) -> list[dict]: """Extract import information from AST.""" imports = [] for node in ast.walk(tree): if isinstance(node, ast.ImportFrom): if node.module and (node.module.startswith("agentkit") or node.module.startswith("configs")): for alias in node.names: imports.append({ "from_module": node.module, "import_name": alias.name, }) elif isinstance(node, ast.Import): for alias in node.names: if alias.name.startswith("agentkit") or alias.name.startswith("configs"): imports.append({ "from_module": None, "import_name": alias.name, }) return imports def module_to_file_path(module: str) -> str: """Convert Python module path to file path.""" parts = module.split(".") # Handle agentkit modules if module.startswith("agentkit"): # Skip "agentkit" prefix, it's under src/ sub_parts = parts[1:] # skip "agentkit" if not sub_parts: return "src/agentkit/__init__.py" # Try as package __init__.py init_path = PROJECT_ROOT / "src" / "agentkit" / "/".join(sub_parts) / "__init__.py" if init_path.exists(): return f"src/agentkit/{'/'.join(sub_parts)}/__init__.py" # Try as module.py mod_path = PROJECT_ROOT / "src" / "agentkit" / ("/".join(sub_parts) + ".py") if mod_path.exists(): return f"src/agentkit/{'/'.join(sub_parts)}.py" # Handle configs modules if module.startswith("configs"): sub_parts = parts[1:] # skip "configs" if not sub_parts: return "configs/__init__.py" mod_path = PROJECT_ROOT / "configs" / ("/".join(sub_parts) + ".py") if mod_path.exists(): return f"configs/{'/'.join(sub_parts)}.py" return "" def scan_file(file_path: Path) -> dict: """Scan a single Python file and extract all information.""" try: source = file_path.read_text(encoding="utf-8") tree = ast.parse(source) except (SyntaxError, UnicodeDecodeError): return {"classes": [], "functions": [], "imports": [], "top_level_functions": [], "docstring": ""} rel_path = str(file_path.relative_to(PROJECT_ROOT)) # Extract module docstring docstring = ast.get_docstring(tree) or "" classes = [] functions = [] top_level_functions = [] for node in ast.iter_child_nodes(tree): if isinstance(node, ast.ClassDef): classes.append(extract_class_info(node, rel_path)) elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): func_info = extract_function_info(node) functions.append(func_info) top_level_functions.append(func_info) imports = extract_imports(tree, rel_path) return { "classes": classes, "functions": top_level_functions, "imports": imports, "rel_path": rel_path, "docstring": docstring, } def build_knowledge_graph(): """Build the complete knowledge graph.""" # Collect all Python files py_files = [] for scan_dir in SCAN_DIRS: if scan_dir.exists(): for py_file in scan_dir.rglob("*.py"): py_files.append(py_file) print(f"Found {len(py_files)} Python files to scan") # Scan all files file_data = {} for py_file in sorted(py_files): data = scan_file(py_file) rel_path = data["rel_path"] file_data[rel_path] = data # Build nodes and edges nodes = [] edges = [] # Track all node IDs for edge building file_node_ids = {} class_node_ids = {} func_node_ids = {} # 1. Create file nodes for rel_path, data in file_data.items(): node_id = f"file:{rel_path}" layer = get_layer(rel_path) summary = get_file_summary(rel_path, data.get("docstring", "")) tags = [] parts = rel_path.replace("\\", "/").split("/") for p in parts: if p not in ("src", "agentkit", "__init__.py") and not p.endswith(".py"): tags.append(p) nodes.append({ "id": node_id, "type": "file", "name": rel_path.split("/")[-1], "filePath": rel_path, "layer": layer, "summary": summary, "tags": tags, "complexity": "moderate" if data["classes"] or data["functions"] else "simple", }) file_node_ids[rel_path] = node_id # 2. Create class nodes for rel_path, data in file_data.items(): for cls in data["classes"]: class_id = f"class:{cls['name']}" layer = get_layer(rel_path) method_names = [m["name"] for m in cls["methods"]] # Use docstring for summary if available docstring = cls.get("docstring", "") if docstring: # Take first line of docstring summary = docstring.strip().split("\n")[0].strip() else: summary = f"{cls['name']}类" if cls["base_classes"]: summary += f",继承自{', '.join(cls['base_classes'])}" if method_names: summary += f",包含方法: {', '.join(method_names[:5])}" if len(method_names) > 5: summary += f" 等{len(method_names)}个方法" nodes.append({ "id": class_id, "type": "class", "name": cls["name"], "filePath": rel_path, "layer": layer, "summary": summary, "tags": [cls["name"]], "complexity": cls["complexity"], }) class_node_ids[cls["name"]] = class_id # Edge: file contains class edges.append({ "id": f"edge:{uuid.uuid4().hex[:8]}", "source": file_node_ids[rel_path], "target": class_id, "type": "contains", "label": f"定义类 {cls['name']}", }) # Edge: class extends base classes for base in cls["base_classes"]: if base in class_node_ids: edges.append({ "id": f"edge:{uuid.uuid4().hex[:8]}", "source": class_id, "target": class_node_ids[base], "type": "extends", "label": f"继承 {base}", }) # 3. Create method nodes for method in cls["methods"]: method_id = f"func:{cls['name']}.{method['name']}" async_tag = "异步" if method["is_async"] else "" summary = f"{cls['name']}.{method['name']}({', '.join(method['params'])}) {async_tag}方法" nodes.append({ "id": method_id, "type": "function", "name": method["name"], "filePath": rel_path, "layer": layer, "summary": summary, "tags": [cls["name"], method["name"]], "complexity": "simple", }) func_node_ids[f"{cls['name']}.{method['name']}"] = method_id # Edge: class contains method edges.append({ "id": f"edge:{uuid.uuid4().hex[:8]}", "source": class_id, "target": method_id, "type": "contains", "label": f"方法 {method['name']}", }) # 4. Create top-level function nodes for rel_path, data in file_data.items(): for func in data["functions"]: func_id = f"func:{func['name']}" async_tag = "异步" if func["is_async"] else "" summary = f"{func['name']}({', '.join(func['params'])}) {async_tag}函数" if func["return_type"]: summary += f" → {func['return_type']}" nodes.append({ "id": func_id, "type": "function", "name": func["name"], "filePath": rel_path, "layer": get_layer(rel_path), "summary": summary, "tags": [func["name"]], "complexity": func["complexity"], }) func_node_ids[func["name"]] = func_id # Edge: file contains function edges.append({ "id": f"edge:{uuid.uuid4().hex[:8]}", "source": file_node_ids[rel_path], "target": func_id, "type": "contains", "label": f"定义函数 {func['name']}", }) # 5. Create import edges for rel_path, data in file_data.items(): for imp in data["imports"]: if imp["from_module"]: target_path = module_to_file_path(imp["from_module"]) if target_path and target_path in file_node_ids: edges.append({ "id": f"edge:{uuid.uuid4().hex[:8]}", "source": file_node_ids[rel_path], "target": file_node_ids[target_path], "type": "imports", "label": f"导入 {imp['import_name']}", }) # 6. Build tours tours = build_tours(file_data, file_node_ids, class_node_ids, func_node_ids) # Get git commit hash git_hash = "d9d1b16e5911ad958cd8ae38958058bea13f3fcc" # Build final JSON graph = { "version": "1.0.0", "project": { "name": "Fischer AgentKit", "languages": ["python"], "frameworks": ["FastAPI", "Pydantic", "SQLAlchemy", "Typer", "Redis"], "description": "AI驱动的Agent框架,支持ReAct引擎、多LLM网关、Pipeline编排、自适应反思和消息总线", "analyzedAt": datetime.now(timezone.utc).isoformat(), "gitCommitHash": git_hash, }, "nodes": nodes, "edges": edges, "tours": tours, } return graph def build_tours(file_data, file_node_ids, class_node_ids, func_node_ids): """Build guided learning tours.""" tours = [] # Tour 1: Entry Points tours.append({ "id": "tour:entry-points", "name": "入口点导览", "description": "从项目入口开始,了解如何启动和使用AgentKit", "steps": [ {"nodeId": "file:src/agentkit/__main__.py", "why": "Python模块入口,python -m agentkit"}, {"nodeId": "file:src/agentkit/__init__.py", "why": "包入口,导出核心公共API"}, {"nodeId": "file:src/agentkit/cli/main.py", "why": "CLI主入口,Typer应用定义"}, {"nodeId": "file:src/agentkit/server/app.py", "why": "HTTP服务入口,FastAPI应用创建"}, ], }) # Tour 2: Core Agent Lifecycle tours.append({ "id": "tour:agent-lifecycle", "name": "Agent生命周期导览", "description": "深入理解Agent从创建到执行任务的完整生命周期", "steps": [ {"nodeId": "class:BaseAgent", "why": "Agent基类,定义标准生命周期和可插拔能力"}, {"nodeId": "func:BaseAgent.start", "why": "Agent启动流程:连接Redis→注册→心跳→监听"}, {"nodeId": "func:BaseAgent.execute", "why": "任务执行框架方法:on_task_start→handle_task→quality_gate→on_task_complete"}, {"nodeId": "func:BaseAgent.handle_task", "why": "抽象方法,子类实现业务逻辑"}, {"nodeId": "class:ConfigDrivenAgent", "why": "配置驱动Agent,从YAML自动组装"}, {"nodeId": "func:ConfigDrivenAgent.handle_task", "why": "根据execution_mode路由到react/direct/custom模式"}, {"nodeId": "class:AgentConfig", "why": "Agent配置模型,支持YAML/Dict构建"}, ], }) # Tour 3: ReAct Engine tours.append({ "id": "tour:react-engine", "name": "ReAct引擎导览", "description": "理解ReAct推理-行动循环的核心实现", "steps": [ {"nodeId": "class:ReActEngine", "why": "ReAct引擎核心,Think→Act→Observe循环"}, {"nodeId": "func:ReActEngine.execute", "why": "执行ReAct循环,支持超时和取消"}, {"nodeId": "func:ReActEngine.execute_stream", "why": "流式执行,逐步yield事件"}, {"nodeId": "func:ReActEngine._execute_tool", "why": "工具调用执行,处理成功和失败"}, {"nodeId": "func:ReActEngine._parse_text_tool_calls", "why": "文本解析模式,支持Action和代码块格式"}, {"nodeId": "class:ReActStep", "why": "单步记录数据结构"}, {"nodeId": "class:ReActResult", "why": "ReAct执行结果数据结构"}, {"nodeId": "class:ReActEvent", "why": "流式执行事件数据结构"}, ], }) # Tour 4: LLM Gateway tours.append({ "id": "tour:llm-gateway", "name": "LLM网关导览", "description": "了解多Provider统一网关的设计和实现", "steps": [ {"nodeId": "class:LLMGateway", "why": "LLM网关核心,统一多Provider调用接口"}, {"nodeId": "file:src/agentkit/llm/protocol.py", "why": "LLM协议定义,LLMProvider/LLMRequest/LLMResponse"}, {"nodeId": "file:src/agentkit/llm/config.py", "why": "模型别名和Provider配置"}, {"nodeId": "file:src/agentkit/llm/providers/openai.py", "why": "OpenAI Provider实现"}, {"nodeId": "file:src/agentkit/llm/providers/anthropic.py", "why": "Anthropic Provider实现"}, {"nodeId": "file:src/agentkit/llm/retry.py", "why": "LLM重试策略"}, ], }) # Tour 5: Memory System tours.append({ "id": "tour:memory-system", "name": "记忆系统导览", "description": "理解多层记忆系统的架构和实现", "steps": [ {"nodeId": "file:src/agentkit/memory/base.py", "why": "记忆基类接口定义"}, {"nodeId": "file:src/agentkit/memory/retriever.py", "why": "统一记忆检索器,整合工作/情景/语义记忆"}, {"nodeId": "file:src/agentkit/memory/working.py", "why": "工作记忆 - 基于Redis的短期记忆"}, {"nodeId": "file:src/agentkit/memory/episodic.py", "why": "情景记忆 - 基于向量的长期记忆"}, {"nodeId": "file:src/agentkit/memory/semantic.py", "why": "语义记忆 - RAG服务集成"}, {"nodeId": "file:src/agentkit/memory/embedder.py", "why": "文本向量化嵌入器"}, ], }) # Tour 6: Orchestration tours.append({ "id": "tour:orchestration", "name": "编排系统导览", "description": "了解多Agent协作编排和Pipeline引擎", "steps": [ {"nodeId": "class:Orchestrator", "why": "多Agent协作编排器,Orchestrator-Worker模式"}, {"nodeId": "func:Orchestrator.execute", "why": "编排执行:分解→执行→汇总"}, {"nodeId": "func:Orchestrator.execute_adaptive", "why": "自适应编排:执行→评估→再分解循环"}, {"nodeId": "file:src/agentkit/orchestrator/pipeline_engine.py", "why": "Pipeline引擎,执行DAG工作流"}, {"nodeId": "file:src/agentkit/orchestrator/pipeline_schema.py", "why": "Pipeline配置模型"}, {"nodeId": "file:src/agentkit/orchestrator/reflection.py", "why": "执行后反思模块"}, ], }) # Tour 7: Skills & Router tours.append({ "id": "tour:skills-router", "name": "技能与路由导览", "description": "了解技能定义、注册和意图路由机制", "steps": [ {"nodeId": "file:src/agentkit/skills/base.py", "why": "技能基类和配置定义"}, {"nodeId": "class:SkillRegistry", "why": "技能注册中心"}, {"nodeId": "file:src/agentkit/skills/loader.py", "why": "从YAML加载技能定义"}, {"nodeId": "class:IntentRouter", "why": "意图路由器,匹配用户输入到技能"}, {"nodeId": "file:src/agentkit/router/intent.py", "why": "意图路由实现"}, ], }) # Tour 8: Evolution tours.append({ "id": "tour:evolution", "name": "进化系统导览", "description": "了解Agent自我进化的机制和实现", "steps": [ {"nodeId": "file:src/agentkit/evolution/lifecycle.py", "why": "进化生命周期Mixin"}, {"nodeId": "file:src/agentkit/evolution/reflector.py", "why": "反思器 - 分析结果生成改进建议"}, {"nodeId": "file:src/agentkit/evolution/prompt_optimizer.py", "why": "Prompt自动优化"}, {"nodeId": "file:src/agentkit/evolution/genetic.py", "why": "遗传算法进化"}, {"nodeId": "file:src/agentkit/evolution/ab_tester.py", "why": "A/B测试对比"}, ], }) # Tour 9: Infrastructure tours.append({ "id": "tour:infrastructure", "name": "基础设施导览", "description": "了解消息总线、会话管理、遥测等基础设施", "steps": [ {"nodeId": "file:src/agentkit/bus/protocol.py", "why": "消息总线协议接口"}, {"nodeId": "file:src/agentkit/bus/redis_bus.py", "why": "Redis Pub/Sub消息总线"}, {"nodeId": "file:src/agentkit/bus/memory_bus.py", "why": "进程内消息总线"}, {"nodeId": "file:src/agentkit/session/manager.py", "why": "会话管理器"}, {"nodeId": "file:src/agentkit/telemetry/tracing.py", "why": "OpenTelemetry追踪集成"}, {"nodeId": "file:src/agentkit/telemetry/metrics.py", "why": "运行指标收集"}, ], }) return tours def main(): """Main entry point.""" print("Building knowledge graph for Fischer AgentKit...") graph = build_knowledge_graph() # Ensure output directory exists OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) # Write JSON with open(OUTPUT_PATH, "w", encoding="utf-8") as f: json.dump(graph, f, ensure_ascii=False, indent=2) print(f"Knowledge graph written to {OUTPUT_PATH}") print(f" Nodes: {len(graph['nodes'])}") print(f" Edges: {len(graph['edges'])}") print(f" Tours: {len(graph['tours'])}") # Print layer statistics layer_counts = {} for node in graph["nodes"]: layer = node["layer"] layer_counts[layer] = layer_counts.get(layer, 0) + 1 print("\nLayer distribution:") for layer, count in sorted(layer_counts.items()): print(f" {layer}: {count} nodes") # Print type statistics type_counts = {} for node in graph["nodes"]: t = node["type"] type_counts[t] = type_counts.get(t, 0) + 1 print("\nNode type distribution:") for t, count in sorted(type_counts.items()): print(f" {t}: {count} nodes") if __name__ == "__main__": main()