fischer-agentkit/.understand-anything/build_kg.py

#!/usr/bin/env python3
"""Knowledge Graph Builder for Fischer AgentKit

Scans all Python source files under src/agentkit/ and configs/,
extracts classes, functions, imports, and builds a comprehensive
knowledge graph JSON file.
"""

import ast
import json
import os
import sys
import uuid
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

# Project root
PROJECT_ROOT = Path("/Users/Chiguyong/Code/Fischer/fischer-agentkit")
OUTPUT_PATH = PROJECT_ROOT / ".understand-anything" / "knowledge-graph.json"

# Directories to scan
SCAN_DIRS = [
    PROJECT_ROOT / "src" / "agentkit",
    PROJECT_ROOT / "configs",
]

# Architecture layer mapping
LAYER_MAP = {
    "server": "api",
    "cli": "api",
    "core": "service",
    "orchestrator": "service",
    "skills": "service",
    "router": "service",
    "memory": "data",
    "session": "data",
    "bus": "data",
    "llm": "utility",
    "mcp": "utility",
    "tools": "utility",
    "telemetry": "utility",
    "prompts": "utility",
    "quality": "utility",
    "evaluation": "utility",
    "evolution": "utility",
    "configs": "utility",
}

# Chinese summaries for modules
MODULE_SUMMARIES = {
    "core": "核心模块 - 定义Agent基类、通信协议、ReAct引擎、任务分发、注册中心等基础组件",
    "core.base": "Agent基类 - 统一Agent生命周期管理，包括启动、停止、任务执行、Handoff、进度上报",
    "core.protocol": "通信协议定义 - 统一消息格式，包括TaskMessage、TaskResult、TaskProgress、HandoffMessage等",
    "core.react": "ReAct推理-行动循环引擎 - 实现Think→Act→Observe循环，支持工具调用和文本解析模式",
    "core.exceptions": "自定义异常体系 - 定义Agent框架所有异常类型",
    "core.dispatcher": "任务分发器 - 通过Redis Queue将任务分发给Agent，支持回调、重试、进度上报",
    "core.registry": "Agent注册中心 - 管理Agent的注册、发现、状态、心跳和负载均衡",
    "core.config_driven": "配置驱动Agent - 从YAML/Dict配置自动组装Agent，支持llm_generate/tool_call/custom三种模式",
    "core.compressor": "上下文压缩器 - 长会话自动压缩历史消息，支持LLM摘要和简单截断策略",
    "core.trace": "执行轨迹记录器 - 记录ReAct执行过程中的完整轨迹，为反思和可观测性提供数据",
    "core.shared_workspace": "共享工作空间 - 基于Redis的Agent间共享状态存储，支持读写、锁操作",
    "core.agent_pool": "Agent实例池 - 运行时管理Agent的创建、获取、删除",
    "core.orchestrator": "多Agent协作编排器 - 实现Orchestrator-Worker模式，支持任务分解、并行执行、自适应编排",
    "core.headroom_compressor": "Headroom AI压缩器 - 基于Headroom AI的上下文压缩实现",
    "core.logging": "日志配置 - 统一日志格式和配置",
    "core.standalone": "独立运行模式 - 支持Agent脱离框架独立运行",
    "core.goal_planner": "目标规划器 - 将复杂目标分解为可执行步骤",
    "core.plan_checker": "计划检查器 - 验证执行计划的完整性和可行性",
    "core.plan_exec_engine": "计划执行引擎 - 执行分解后的计划步骤",
    "core.plan_executor": "计划执行器 - 管理计划执行的完整流程",
    "core.plan_schema": "计划Schema - 执行计划的数据结构定义",
    "core.reflexion": "Reflexion引擎 - 自反思推理，通过自我评估改进输出",
    "core.rewoo": "ReWOO引擎 - 无观察推理，先规划后执行的高效模式",

    "llm": "LLM网关模块 - 多Provider统一网关，支持OpenAI/Anthropic/Gemini/文心/豆包/元宝等",
    "llm.gateway": "LLM网关 - 统一多Provider调用接口，支持路由、重试、流式输出",
    "llm.protocol": "LLM协议定义 - 定义LLMProvider、LLMRequest、LLMResponse等接口",
    "llm.config": "LLM配置 - 模型别名、Provider配置管理",
    "llm.retry": "LLM重试策略 - 指数退避重试和错误处理",
    "llm.providers": "LLM Provider实现 - 各大模型服务商的具体适配器",
    "llm.providers.openai": "OpenAI Provider - 支持GPT-4/GPT-3.5等模型",
    "llm.providers.anthropic": "Anthropic Provider - 支持Claude系列模型",
    "llm.providers.gemini": "Gemini Provider - 支持Google Gemini模型",
    "llm.providers.wenxin": "文心一言Provider - 支持百度文心大模型",
    "llm.providers.doubao": "豆包Provider - 支持字节豆包大模型",
    "llm.providers.yuanbao": "元宝Provider - 支持腾讯元宝大模型",
    "llm.providers.tracker": "LLM调用追踪器 - 记录和统计LLM调用",
    "llm.providers.usage_store": "LLM用量存储 - Token用量和成本追踪，支持InMemory和Redis后端",
    "llm.cache": "LLM响应缓存 - 基于语义相似度的LLM响应缓存，减少重复调用",
    "llm.cache_key": "缓存键生成 - LLM缓存键的计算和归一化",

    "chat": "聊天路由模块 - CostAwareRouter三层意图路由和语义路由",
    "chat.skill_routing": "三层意图路由 - CostAwareRouter，正则→启发式→LLM分类逐层升级",
    "chat.semantic_router": "语义路由 - 基于向量相似度的意图路由，支持语义匹配",

    "quality.cascade_detector": "级联检测器 - 检测Agent输出中的级联失败模式",
    "quality.cascade_state_store": "级联状态存储 - 级联检测状态持久化，支持InMemory和Redis后端",
    "quality.alignment": "对齐守卫 - 检测和修正Agent输出中的对齐偏差",

    "tools": "工具模块 - 提供Agent可调用的各类工具",
    "tools.base": "工具基类 - 定义Tool接口和标准执行流程",
    "tools.registry": "工具注册中心 - 管理工具的注册、发现、获取",
    "tools.shell": "Shell工具 - 执行系统命令",
    "tools.web_search": "Web搜索工具 - 执行网络搜索",
    "tools.web_crawl": "Web爬取工具 - 爬取网页内容",
    "tools.memory_tool": "记忆工具 - Agent记忆读写操作",
    "tools.ask_human": "人工介入工具 - 请求人类输入",
    "tools.schema_tools": "Schema工具 - JSON Schema相关操作",
    "tools.function_tool": "函数工具 - 将Python函数包装为Tool",
    "tools.agent_tool": "Agent工具 - 将Agent包装为可调用Tool",
    "tools.mcp_tool": "MCP工具 - MCP协议工具适配器",
    "tools.composition": "工具组合 - 支持工具链式组合",
    "tools.baidu_search": "百度搜索工具 - 百度搜索引擎集成",
    "tools.headroom_retrieve": "Headroom检索工具 - Headroom AI知识检索",
    "tools.computer_use": "计算机使用工具 - 桌面操控工具，支持截图、点击、输入等操作",
    "tools.computer_use_session": "计算机使用会话 - 桌面操控会话管理，支持云端和本地(pyautogui)模式",
    "tools.computer_use_recorder": "计算机使用录制器 - 记录桌面操控动作序列",
    "tools.pty_session": "PTY会话 - 伪终端会话管理",
    "tools.terminal_session": "终端会话 - 终端模拟器会话",
    "tools.output_parser": "输出解析器 - 解析Agent输出为结构化数据",
    "tools.skill_install": "技能安装器 - 动态安装技能包",

    "memory": "记忆模块 - 多层记忆系统，支持工作记忆、情景记忆、语义记忆",
    "memory.base": "记忆基类 - 定义Memory接口",
    "memory.working": "工作记忆 - 基于Redis的短期工作记忆",
    "memory.episodic": "情景记忆 - 基于向量数据库的长期情景记忆",
    "memory.semantic": "语义记忆 - 基于RAG服务的语义知识检索",
    "memory.profile": "用户画像 - 用户偏好和历史信息管理",
    "memory.retriever": "记忆检索器 - 统一多层记忆检索接口",
    "memory.embedder": "嵌入器 - 文本向量化，支持OpenAI Embedding",
    "memory.models": "记忆数据模型 - Pydantic模型定义",
    "memory.rag_loop": "RAG循环 - 检索增强生成的迭代循环",
    "memory.query_transformer": "查询转换器 - 优化检索查询",
    "memory.relevance_scorer": "相关性评分器 - 评估检索结果相关性",
    "memory.contextual_retrieval": "上下文检索 - 基于上下文的检索增强",
    "memory.http_rag": "HTTP RAG服务 - 远程RAG API客户端",

    "skills": "技能模块 - 定义可复用的Agent技能，包含意图、工具和质量门控",
    "skills.base": "技能基类 - 定义Skill、SkillConfig、IntentConfig等",
    "skills.registry": "技能注册中心 - 管理技能的注册、发现、获取",
    "skills.loader": "技能加载器 - 从YAML配置加载技能定义",
    "skills.pipeline": "技能Pipeline - 技能编排流程",
    "skills.skill_md": "Markdown技能 - 从Markdown文档生成技能",
    "skills.geo_pipeline": "GEO Pipeline - 地理信息处理Pipeline",

    "orchestrator": "编排模块 - Pipeline编排引擎，支持DAG工作流",
    "orchestrator.pipeline_engine": "Pipeline引擎 - 执行DAG定义的工作流",
    "orchestrator.pipeline_schema": "Pipeline Schema - Pipeline配置模型定义",
    "orchestrator.pipeline_state": "Pipeline状态 - Pipeline执行状态管理",
    "orchestrator.pipeline_models": "Pipeline模型 - Pipeline数据模型",
    "orchestrator.pipeline_loader": "Pipeline加载器 - 从YAML加载Pipeline定义",
    "orchestrator.reflection": "反思模块 - 执行后反思和改进",
    "orchestrator.retry": "重试策略 - Pipeline步骤重试机制",
    "orchestrator.compensation": "补偿机制 - Pipeline失败时的补偿操作",
    "orchestrator.handoff": "Handoff - Agent间任务转交",
    "orchestrator.dynamic_pipeline": "动态Pipeline - 运行时动态构建Pipeline",

    "router": "路由模块 - 意图路由，将用户输入匹配到对应技能",
    "router.intent": "意图路由器 - 基于LLM的意图识别和路由",

    "quality": "质量模块 - 输出质量门控和标准化",
    "quality.gate": "质量门控 - 检查Agent输出是否满足质量要求",
    "quality.output": "输出标准化 - 统一Agent输出格式",

    "prompts": "Prompt模块 - Prompt模板和渲染",
    "prompts.template": "Prompt模板 - 支持变量替换和Section组合",
    "prompts.section": "Prompt Section - 定义Prompt的各组成部分",

    "bus": "消息总线模块 - Agent间异步通信",
    "bus.protocol": "总线协议 - 定义消息总线接口",
    "bus.message": "消息定义 - Agent间通信消息格式",
    "bus.memory_bus": "内存消息总线 - 基于进程内队列的消息总线",
    "bus.redis_bus": "Redis消息总线 - 基于Redis Pub/Sub的消息总线",

    "session": "会话模块 - 会话管理和持久化",
    "session.manager": "会话管理器 - 管理对话会话的创建、获取、更新",
    "session.store": "会话存储 - 会话数据的持久化存储",
    "session.models": "会话模型 - 会话相关的数据模型",

    "server": "服务器模块 - FastAPI HTTP/WebSocket服务",
    "server.app": "FastAPI应用 - 创建和配置FastAPI应用实例",
    "server.config": "服务器配置 - 服务器运行参数配置",
    "server.runner": "服务器运行器 - 启动和管理服务器进程",
    "server.middleware": "中间件 - 请求处理中间件",
    "server.client": "API客户端 - 服务端API客户端封装",
    "server.client_config": "客户端配置 - API客户端配置管理",
    "server.task_store": "任务存储 - 服务端任务状态存储",
    "server.routes": "路由模块 - HTTP/WebSocket路由定义",
    "server.routes.chat": "聊天路由 - 对话API端点",
    "server.routes.ws": "WebSocket路由 - 实时通信端点",
    "server.routes.tasks": "任务路由 - 任务管理API",
    "server.routes.agents": "Agent路由 - Agent管理API",
    "server.routes.skills": "技能路由 - 技能管理API，含@-mention建议端点",
    "server.routes.memory": "记忆路由 - 记忆管理API",
    "server.routes.llm": "LLM路由 - LLM配置和调用API",
    "server.routes.health": "健康检查路由 - 服务健康状态端点",
    "server.routes.metrics": "指标路由 - 运行指标API",
    "server.routes.evolution": "进化路由 - Agent进化管理API",
    "server.routes.evolution_dashboard": "进化仪表盘路由 - 进化数据可视化API",
    "server.routes.kb_management": "知识库管理路由 - 文档上传/搜索/源配置API",
    "server.routes.settings": "设置路由 - 系统配置管理API",
    "server.routes.terminal": "终端路由 - PTY终端会话API",
    "server.routes.workflows": "工作流路由 - Pipeline工作流管理API",
    "server.routes.skill_management": "技能管理路由 - 技能CRUD操作API",
    "server.routes.portal": "门户路由 - Web GUI入口和静态资源",

    "cli": "命令行模块 - CLI工具",
    "cli.main": "CLI入口 - Typer应用主入口",
    "cli.chat": "聊天命令 - 交互式对话命令",
    "cli.init": "初始化命令 - 项目初始化",
    "cli.onboarding": "引导命令 - 新用户引导流程",
    "cli.skill": "技能命令 - 技能管理CLI",
    "cli.task": "任务命令 - 任务提交和管理CLI",
    "cli.pair": "配对命令 - Agent配对",
    "cli.usage": "使用统计命令 - 使用情况统计",
    "cli.templates": "模板命令 - Agent模板管理",

    "mcp": "MCP协议模块 - Model Context Protocol集成",
    "mcp.client": "MCP客户端 - 连接MCP服务器",
    "mcp.server": "MCP服务器 - 提供MCP服务",
    "mcp.manager": "MCP管理器 - 管理MCP连接",
    "mcp.transport": "MCP传输层 - MCP通信传输实现",

    "telemetry": "遥测模块 - 可观测性支持",
    "telemetry.tracing": "分布式追踪 - OpenTelemetry追踪集成",
    "telemetry.metrics": "指标收集 - 运行指标收集和导出",
    "telemetry.setup": "遥测设置 - 初始化遥测组件",

    "evolution": "进化模块 - Agent自我进化能力",
    "evolution.lifecycle": "进化生命周期 - EvolutionMixin，任务后触发进化",
    "evolution.reflector": "反思器 - 分析任务执行结果，生成改进建议",
    "evolution.llm_reflector": "LLM反思器 - 使用LLM进行深度反思",
    "evolution.prompt_optimizer": "Prompt优化器 - 自动优化Agent Prompt",
    "evolution.strategy_tuner": "策略调优器 - 调整Agent执行策略",
    "evolution.genetic": "遗传算法 - 基于遗传算法的Prompt进化",
    "evolution.fitness": "适应度评估 - 评估进化变体的质量",
    "evolution.ab_tester": "A/B测试 - 对比测试不同进化变体",
    "evolution.evolution_store": "进化存储 - 持久化进化历史",
    "evolution.models": "进化模型 - 进化相关数据模型",
    "evolution.experience_schema": "经验Schema - 经验数据结构定义",
    "evolution.experience_store": "经验存储 - 成功/失败经验持久化",
    "evolution.path_optimizer": "路径优化器 - 分析工具调用路径，推荐更优策略",
    "evolution.pitfall_detector": "陷阱检测器 - 检测重复错误模式",

    "evaluation": "评估模块 - Agent输出质量评估",
    "evaluation.ragas_evaluator": "RAGAS评估器 - 使用RAGAS框架评估RAG质量",

    "org": "组织发现模块 - 多Agent组织架构和协作发现",
    "org.context": "组织上下文 - 组织级别的共享上下文管理",
    "org.discovery": "组织发现 - Agent间能力发现和协作匹配",

    "marketplace": "多Agent市场模块 - Agent间的拍卖和财富分配",
    "marketplace.auction": "拍卖机制 - Agent间的任务拍卖和竞价",
    "marketplace.wealth": "财富管理 - Agent间的价值交换和分配",

    "configs": "配置模块 - Pipeline和技能YAML配置",
    "configs.geo_server": "GEO服务器 - 地理信息HTTP服务",
    "configs.geo_handlers": "GEO处理器 - 地理信息请求处理",
    "configs.geo_tools": "GEO工具 - 地理信息相关工具定义",
}


def get_layer(file_path: str) -> str:
    """Determine architecture layer from file path."""
    parts = file_path.replace("\\", "/").split("/")
    # Check for configs/ prefix
    if "configs" in parts:
        return "utility"
    # For src/agentkit/__init__.py and __main__.py, treat as service
    if parts[-1] in ("__init__.py", "__main__.py") and len(parts) <= 4:
        return "service"
    for part in parts:
        if part in LAYER_MAP:
            return LAYER_MAP[part]
    return "unknown"


def get_module_key(file_path: str) -> str:
    """Get module key for summary lookup."""
    # Convert file path to module key
    rel = file_path
    if rel.startswith("src/agentkit/"):
        rel = rel[len("src/agentkit/"):]
    elif rel.startswith("configs/"):
        rel = rel[len("configs/"):]

    # Remove __init__.py and .py suffix
    rel = rel.replace("/__init__.py", "").replace(".py", "")
    return rel


def get_file_summary(file_path: str, docstring: str = "") -> str:
    """Get Chinese summary for a file."""
    # If we have a docstring, use it as base
    if docstring:
        # Clean up docstring
        doc = docstring.strip().split("\n")[0].strip()
        if doc:
            return doc

    key = get_module_key(file_path)
    # Try exact match first
    if key in MODULE_SUMMARIES:
        return MODULE_SUMMARIES[key]
    # Try parent module
    parts = key.split("/")
    for i in range(len(parts) - 1, 0, -1):
        parent_key = "/".join(parts[:i])
        if parent_key in MODULE_SUMMARIES:
            return MODULE_SUMMARIES[parent_key]
    return f"模块 {key}"


def estimate_complexity(node: ast.AST) -> str:
    """Estimate complexity of an AST node."""
    if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
        # Count branches, loops, nested functions
        complexity = 1
        for child in ast.walk(node):
            if isinstance(child, (ast.If, ast.While, ast.For, ast.ExceptHandler)):
                complexity += 1
            elif isinstance(child, (ast.And, ast.Or)):
                complexity += 1
        if complexity <= 3:
            return "simple"
        elif complexity <= 8:
            return "moderate"
        return "complex"
    elif isinstance(node, ast.ClassDef):
        methods = [n for n in node.body if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))]
        if len(methods) <= 3:
            return "simple"
        elif len(methods) <= 8:
            return "moderate"
        return "complex"
    return "simple"


def extract_class_info(node: ast.ClassDef, file_path: str) -> dict:
    """Extract class information from AST node."""
    base_classes = []
    for base in node.bases:
        if isinstance(base, ast.Name):
            base_classes.append(base.id)
        elif isinstance(base, ast.Attribute):
            base_classes.append(ast.dump(base))

    methods = []
    for item in node.body:
        if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
            params = [arg.arg for arg in item.args.args if arg.arg != "self"]
            methods.append({
                "name": item.name,
                "params": params,
                "is_async": isinstance(item, ast.AsyncFunctionDef),
            })

    # Extract class docstring
    docstring = ast.get_docstring(node) or ""

    return {
        "name": node.name,
        "base_classes": base_classes,
        "methods": methods,
        "complexity": estimate_complexity(node),
        "docstring": docstring,
    }


def extract_function_info(node: ast.FunctionDef | ast.AsyncFunctionDef) -> dict:
    """Extract function information from AST node."""
    params = [arg.arg for arg in node.args.args]

    return_type = ""
    if node.returns:
        if isinstance(node.returns, ast.Name):
            return_type = node.returns.id
        elif isinstance(node.returns, ast.Constant):
            return_type = str(node.returns.value)
        else:
            return_type = ast.dump(node.returns)

    return {
        "name": node.name,
        "params": params,
        "return_type": return_type,
        "is_async": isinstance(node, ast.AsyncFunctionDef),
        "complexity": estimate_complexity(node),
    }


def extract_imports(tree: ast.AST, file_path: str) -> list[dict]:
    """Extract import information from AST."""
    imports = []
    for node in ast.walk(tree):
        if isinstance(node, ast.ImportFrom):
            if node.module and (node.module.startswith("agentkit") or node.module.startswith("configs")):
                for alias in node.names:
                    imports.append({
                        "from_module": node.module,
                        "import_name": alias.name,
                    })
        elif isinstance(node, ast.Import):
            for alias in node.names:
                if alias.name.startswith("agentkit") or alias.name.startswith("configs"):
                    imports.append({
                        "from_module": None,
                        "import_name": alias.name,
                    })
    return imports


def module_to_file_path(module: str) -> str:
    """Convert Python module path to file path."""
    parts = module.split(".")

    # Handle agentkit modules
    if module.startswith("agentkit"):
        # Skip "agentkit" prefix, it's under src/
        sub_parts = parts[1:]  # skip "agentkit"
        if not sub_parts:
            return "src/agentkit/__init__.py"
        # Try as package __init__.py
        init_path = PROJECT_ROOT / "src" / "agentkit" / "/".join(sub_parts) / "__init__.py"
        if init_path.exists():
            return f"src/agentkit/{'/'.join(sub_parts)}/__init__.py"
        # Try as module.py
        mod_path = PROJECT_ROOT / "src" / "agentkit" / ("/".join(sub_parts) + ".py")
        if mod_path.exists():
            return f"src/agentkit/{'/'.join(sub_parts)}.py"

    # Handle configs modules
    if module.startswith("configs"):
        sub_parts = parts[1:]  # skip "configs"
        if not sub_parts:
            return "configs/__init__.py"
        mod_path = PROJECT_ROOT / "configs" / ("/".join(sub_parts) + ".py")
        if mod_path.exists():
            return f"configs/{'/'.join(sub_parts)}.py"

    return ""


def scan_file(file_path: Path) -> dict:
    """Scan a single Python file and extract all information."""
    try:
        source = file_path.read_text(encoding="utf-8")
        tree = ast.parse(source)
    except (SyntaxError, UnicodeDecodeError):
        return {"classes": [], "functions": [], "imports": [], "top_level_functions": [], "docstring": ""}

    rel_path = str(file_path.relative_to(PROJECT_ROOT))

    # Extract module docstring
    docstring = ast.get_docstring(tree) or ""

    classes = []
    functions = []
    top_level_functions = []

    for node in ast.iter_child_nodes(tree):
        if isinstance(node, ast.ClassDef):
            classes.append(extract_class_info(node, rel_path))
        elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
            func_info = extract_function_info(node)
            functions.append(func_info)
            top_level_functions.append(func_info)

    imports = extract_imports(tree, rel_path)

    return {
        "classes": classes,
        "functions": top_level_functions,
        "imports": imports,
        "rel_path": rel_path,
        "docstring": docstring,
    }


def build_knowledge_graph():
    """Build the complete knowledge graph."""
    # Collect all Python files
    py_files = []
    for scan_dir in SCAN_DIRS:
        if scan_dir.exists():
            for py_file in scan_dir.rglob("*.py"):
                py_files.append(py_file)

    print(f"Found {len(py_files)} Python files to scan")

    # Scan all files
    file_data = {}
    for py_file in sorted(py_files):
        data = scan_file(py_file)
        rel_path = data["rel_path"]
        file_data[rel_path] = data

    # Build nodes and edges
    nodes = []
    edges = []

    # Track all node IDs for edge building
    file_node_ids = {}
    class_node_ids = {}
    func_node_ids = {}

    # 1. Create file nodes
    for rel_path, data in file_data.items():
        node_id = f"file:{rel_path}"
        layer = get_layer(rel_path)
        summary = get_file_summary(rel_path, data.get("docstring", ""))

        tags = []
        parts = rel_path.replace("\\", "/").split("/")
        for p in parts:
            if p not in ("src", "agentkit", "__init__.py") and not p.endswith(".py"):
                tags.append(p)

        nodes.append({
            "id": node_id,
            "type": "file",
            "name": rel_path.split("/")[-1],
            "filePath": rel_path,
            "layer": layer,
            "summary": summary,
            "tags": tags,
            "complexity": "moderate" if data["classes"] or data["functions"] else "simple",
        })
        file_node_ids[rel_path] = node_id

    # 2. Create class nodes
    for rel_path, data in file_data.items():
        for cls in data["classes"]:
            class_id = f"class:{cls['name']}"
            layer = get_layer(rel_path)

            method_names = [m["name"] for m in cls["methods"]]
            # Use docstring for summary if available
            docstring = cls.get("docstring", "")
            if docstring:
                # Take first line of docstring
                summary = docstring.strip().split("\n")[0].strip()
            else:
                summary = f"{cls['name']}类"
                if cls["base_classes"]:
                    summary += f"，继承自{', '.join(cls['base_classes'])}"
                if method_names:
                    summary += f"，包含方法: {', '.join(method_names[:5])}"
                    if len(method_names) > 5:
                        summary += f" 等{len(method_names)}个方法"

            nodes.append({
                "id": class_id,
                "type": "class",
                "name": cls["name"],
                "filePath": rel_path,
                "layer": layer,
                "summary": summary,
                "tags": [cls["name"]],
                "complexity": cls["complexity"],
            })
            class_node_ids[cls["name"]] = class_id

            # Edge: file contains class
            edges.append({
                "id": f"edge:{uuid.uuid4().hex[:8]}",
                "source": file_node_ids[rel_path],
                "target": class_id,
                "type": "contains",
                "label": f"定义类 {cls['name']}",
            })

            # Edge: class extends base classes
            for base in cls["base_classes"]:
                if base in class_node_ids:
                    edges.append({
                        "id": f"edge:{uuid.uuid4().hex[:8]}",
                        "source": class_id,
                        "target": class_node_ids[base],
                        "type": "extends",
                        "label": f"继承 {base}",
                    })

            # 3. Create method nodes
            for method in cls["methods"]:
                method_id = f"func:{cls['name']}.{method['name']}"
                async_tag = "异步" if method["is_async"] else ""
                summary = f"{cls['name']}.{method['name']}({', '.join(method['params'])}) {async_tag}方法"

                nodes.append({
                    "id": method_id,
                    "type": "function",
                    "name": method["name"],
                    "filePath": rel_path,
                    "layer": layer,
                    "summary": summary,
                    "tags": [cls["name"], method["name"]],
                    "complexity": "simple",
                })
                func_node_ids[f"{cls['name']}.{method['name']}"] = method_id

                # Edge: class contains method
                edges.append({
                    "id": f"edge:{uuid.uuid4().hex[:8]}",
                    "source": class_id,
                    "target": method_id,
                    "type": "contains",
                    "label": f"方法 {method['name']}",
                })

    # 4. Create top-level function nodes
    for rel_path, data in file_data.items():
        for func in data["functions"]:
            func_id = f"func:{func['name']}"
            async_tag = "异步" if func["is_async"] else ""
            summary = f"{func['name']}({', '.join(func['params'])}) {async_tag}函数"
            if func["return_type"]:
                summary += f" → {func['return_type']}"

            nodes.append({
                "id": func_id,
                "type": "function",
                "name": func["name"],
                "filePath": rel_path,
                "layer": get_layer(rel_path),
                "summary": summary,
                "tags": [func["name"]],
                "complexity": func["complexity"],
            })
            func_node_ids[func["name"]] = func_id

            # Edge: file contains function
            edges.append({
                "id": f"edge:{uuid.uuid4().hex[:8]}",
                "source": file_node_ids[rel_path],
                "target": func_id,
                "type": "contains",
                "label": f"定义函数 {func['name']}",
            })

    # 5. Create import edges
    for rel_path, data in file_data.items():
        for imp in data["imports"]:
            if imp["from_module"]:
                target_path = module_to_file_path(imp["from_module"])
                if target_path and target_path in file_node_ids:
                    edges.append({
                        "id": f"edge:{uuid.uuid4().hex[:8]}",
                        "source": file_node_ids[rel_path],
                        "target": file_node_ids[target_path],
                        "type": "imports",
                        "label": f"导入 {imp['import_name']}",
                    })

    # 6. Build tours
    tours = build_tours(file_data, file_node_ids, class_node_ids, func_node_ids)

    # Get git commit hash
    git_hash = "d9d1b16e5911ad958cd8ae38958058bea13f3fcc"

    # Build final JSON
    graph = {
        "version": "1.0.0",
        "project": {
            "name": "Fischer AgentKit",
            "languages": ["python"],
            "frameworks": ["FastAPI", "Pydantic", "SQLAlchemy", "Typer", "Redis"],
            "description": "AI驱动的Agent框架，支持ReAct引擎、多LLM网关、Pipeline编排、自适应反思和消息总线",
            "analyzedAt": datetime.now(timezone.utc).isoformat(),
            "gitCommitHash": git_hash,
        },
        "nodes": nodes,
        "edges": edges,
        "tours": tours,
    }

    return graph


def build_tours(file_data, file_node_ids, class_node_ids, func_node_ids):
    """Build guided learning tours."""
    tours = []

    # Tour 1: Entry Points
    tours.append({
        "id": "tour:entry-points",
        "name": "入口点导览",
        "description": "从项目入口开始，了解如何启动和使用AgentKit",
        "steps": [
            {"nodeId": "file:src/agentkit/__main__.py", "why": "Python模块入口，python -m agentkit"},
            {"nodeId": "file:src/agentkit/__init__.py", "why": "包入口，导出核心公共API"},
            {"nodeId": "file:src/agentkit/cli/main.py", "why": "CLI主入口，Typer应用定义"},
            {"nodeId": "file:src/agentkit/server/app.py", "why": "HTTP服务入口，FastAPI应用创建"},
        ],
    })

    # Tour 2: Core Agent Lifecycle
    tours.append({
        "id": "tour:agent-lifecycle",
        "name": "Agent生命周期导览",
        "description": "深入理解Agent从创建到执行任务的完整生命周期",
        "steps": [
            {"nodeId": "class:BaseAgent", "why": "Agent基类，定义标准生命周期和可插拔能力"},
            {"nodeId": "func:BaseAgent.start", "why": "Agent启动流程：连接Redis→注册→心跳→监听"},
            {"nodeId": "func:BaseAgent.execute", "why": "任务执行框架方法：on_task_start→handle_task→quality_gate→on_task_complete"},
            {"nodeId": "func:BaseAgent.handle_task", "why": "抽象方法，子类实现业务逻辑"},
            {"nodeId": "class:ConfigDrivenAgent", "why": "配置驱动Agent，从YAML自动组装"},
            {"nodeId": "func:ConfigDrivenAgent.handle_task", "why": "根据execution_mode路由到react/direct/custom模式"},
            {"nodeId": "class:AgentConfig", "why": "Agent配置模型，支持YAML/Dict构建"},
        ],
    })

    # Tour 3: ReAct Engine
    tours.append({
        "id": "tour:react-engine",
        "name": "ReAct引擎导览",
        "description": "理解ReAct推理-行动循环的核心实现",
        "steps": [
            {"nodeId": "class:ReActEngine", "why": "ReAct引擎核心，Think→Act→Observe循环"},
            {"nodeId": "func:ReActEngine.execute", "why": "执行ReAct循环，支持超时和取消"},
            {"nodeId": "func:ReActEngine.execute_stream", "why": "流式执行，逐步yield事件"},
            {"nodeId": "func:ReActEngine._execute_tool", "why": "工具调用执行，处理成功和失败"},
            {"nodeId": "func:ReActEngine._parse_text_tool_calls", "why": "文本解析模式，支持Action和代码块格式"},
            {"nodeId": "class:ReActStep", "why": "单步记录数据结构"},
            {"nodeId": "class:ReActResult", "why": "ReAct执行结果数据结构"},
            {"nodeId": "class:ReActEvent", "why": "流式执行事件数据结构"},
        ],
    })

    # Tour 4: LLM Gateway
    tours.append({
        "id": "tour:llm-gateway",
        "name": "LLM网关导览",
        "description": "了解多Provider统一网关的设计和实现",
        "steps": [
            {"nodeId": "class:LLMGateway", "why": "LLM网关核心，统一多Provider调用接口"},
            {"nodeId": "file:src/agentkit/llm/protocol.py", "why": "LLM协议定义，LLMProvider/LLMRequest/LLMResponse"},
            {"nodeId": "file:src/agentkit/llm/config.py", "why": "模型别名和Provider配置"},
            {"nodeId": "file:src/agentkit/llm/providers/openai.py", "why": "OpenAI Provider实现"},
            {"nodeId": "file:src/agentkit/llm/providers/anthropic.py", "why": "Anthropic Provider实现"},
            {"nodeId": "file:src/agentkit/llm/retry.py", "why": "LLM重试策略"},
        ],
    })

    # Tour 5: Memory System
    tours.append({
        "id": "tour:memory-system",
        "name": "记忆系统导览",
        "description": "理解多层记忆系统的架构和实现",
        "steps": [
            {"nodeId": "file:src/agentkit/memory/base.py", "why": "记忆基类接口定义"},
            {"nodeId": "file:src/agentkit/memory/retriever.py", "why": "统一记忆检索器，整合工作/情景/语义记忆"},
            {"nodeId": "file:src/agentkit/memory/working.py", "why": "工作记忆 - 基于Redis的短期记忆"},
            {"nodeId": "file:src/agentkit/memory/episodic.py", "why": "情景记忆 - 基于向量的长期记忆"},
            {"nodeId": "file:src/agentkit/memory/semantic.py", "why": "语义记忆 - RAG服务集成"},
            {"nodeId": "file:src/agentkit/memory/embedder.py", "why": "文本向量化嵌入器"},
        ],
    })

    # Tour 6: Orchestration
    tours.append({
        "id": "tour:orchestration",
        "name": "编排系统导览",
        "description": "了解多Agent协作编排和Pipeline引擎",
        "steps": [
            {"nodeId": "class:Orchestrator", "why": "多Agent协作编排器，Orchestrator-Worker模式"},
            {"nodeId": "func:Orchestrator.execute", "why": "编排执行：分解→执行→汇总"},
            {"nodeId": "func:Orchestrator.execute_adaptive", "why": "自适应编排：执行→评估→再分解循环"},
            {"nodeId": "file:src/agentkit/orchestrator/pipeline_engine.py", "why": "Pipeline引擎，执行DAG工作流"},
            {"nodeId": "file:src/agentkit/orchestrator/pipeline_schema.py", "why": "Pipeline配置模型"},
            {"nodeId": "file:src/agentkit/orchestrator/reflection.py", "why": "执行后反思模块"},
        ],
    })

    # Tour 7: Skills & Router
    tours.append({
        "id": "tour:skills-router",
        "name": "技能与路由导览",
        "description": "了解技能定义、注册和意图路由机制",
        "steps": [
            {"nodeId": "file:src/agentkit/skills/base.py", "why": "技能基类和配置定义"},
            {"nodeId": "class:SkillRegistry", "why": "技能注册中心"},
            {"nodeId": "file:src/agentkit/skills/loader.py", "why": "从YAML加载技能定义"},
            {"nodeId": "class:IntentRouter", "why": "意图路由器，匹配用户输入到技能"},
            {"nodeId": "file:src/agentkit/router/intent.py", "why": "意图路由实现"},
        ],
    })

    # Tour 8: Evolution
    tours.append({
        "id": "tour:evolution",
        "name": "进化系统导览",
        "description": "了解Agent自我进化的机制和实现",
        "steps": [
            {"nodeId": "file:src/agentkit/evolution/lifecycle.py", "why": "进化生命周期Mixin"},
            {"nodeId": "file:src/agentkit/evolution/reflector.py", "why": "反思器 - 分析结果生成改进建议"},
            {"nodeId": "file:src/agentkit/evolution/prompt_optimizer.py", "why": "Prompt自动优化"},
            {"nodeId": "file:src/agentkit/evolution/genetic.py", "why": "遗传算法进化"},
            {"nodeId": "file:src/agentkit/evolution/ab_tester.py", "why": "A/B测试对比"},
        ],
    })

    # Tour 9: Infrastructure
    tours.append({
        "id": "tour:infrastructure",
        "name": "基础设施导览",
        "description": "了解消息总线、会话管理、遥测等基础设施",
        "steps": [
            {"nodeId": "file:src/agentkit/bus/protocol.py", "why": "消息总线协议接口"},
            {"nodeId": "file:src/agentkit/bus/redis_bus.py", "why": "Redis Pub/Sub消息总线"},
            {"nodeId": "file:src/agentkit/bus/memory_bus.py", "why": "进程内消息总线"},
            {"nodeId": "file:src/agentkit/session/manager.py", "why": "会话管理器"},
            {"nodeId": "file:src/agentkit/telemetry/tracing.py", "why": "OpenTelemetry追踪集成"},
            {"nodeId": "file:src/agentkit/telemetry/metrics.py", "why": "运行指标收集"},
        ],
    })

    return tours


def main():
    """Main entry point."""
    print("Building knowledge graph for Fischer AgentKit...")

    graph = build_knowledge_graph()

    # Ensure output directory exists
    OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)

    # Write JSON
    with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
        json.dump(graph, f, ensure_ascii=False, indent=2)

    print(f"Knowledge graph written to {OUTPUT_PATH}")
    print(f"  Nodes: {len(graph['nodes'])}")
    print(f"  Edges: {len(graph['edges'])}")
    print(f"  Tours: {len(graph['tours'])}")

    # Print layer statistics
    layer_counts = {}
    for node in graph["nodes"]:
        layer = node["layer"]
        layer_counts[layer] = layer_counts.get(layer, 0) + 1

    print("\nLayer distribution:")
    for layer, count in sorted(layer_counts.items()):
        print(f"  {layer}: {count} nodes")

    # Print type statistics
    type_counts = {}
    for node in graph["nodes"]:
        t = node["type"]
        type_counts[t] = type_counts.get(t, 0) + 1

    print("\nNode type distribution:")
    for t, count in sorted(type_counts.items()):
        print(f"  {t}: {count} nodes")


if __name__ == "__main__":
    main()