import json import logging import uuid from datetime import datetime, timezone from sqlalchemy import select, func from sqlalchemy.ext.asyncio import AsyncSession from app.models.schema_suggestion import SchemaSuggestion from app.services.llm import LLMFactory, LLMError from app.prompts.schema_advisor import SCHEMA_ADVISOR_TEMPLATE from app.utils.json_extractor import extract_json logger = logging.getLogger(__name__) SCHEMA_TEMPLATES = { "Organization": { "@context": "https://schema.org", "@type": "Organization", "name": "", "description": "", "url": "", "logo": "", "sameAs": [], "contactPoint": { "@type": "ContactPoint", "contactType": "customer service", "telephone": "", }, }, "Product": { "@context": "https://schema.org", "@type": "Product", "name": "", "description": "", "brand": {"@type": "Brand", "name": ""}, "offers": { "@type": "Offer", "priceCurrency": "CNY", "availability": "https://schema.org/InStock", }, }, "FAQPage": { "@context": "https://schema.org", "@type": "FAQPage", "mainEntity": [ { "@type": "Question", "name": "", "acceptedAnswer": { "@type": "Answer", "text": "", }, } ], }, "Article": { "@context": "https://schema.org", "@type": "Article", "headline": "", "description": "", "author": {"@type": "Organization", "name": ""}, "datePublished": "", "image": "", }, "LocalBusiness": { "@context": "https://schema.org", "@type": "LocalBusiness", "name": "", "address": { "@type": "PostalAddress", "streetAddress": "", "addressLocality": "", "addressRegion": "", "postalCode": "", "addressCountry": "CN", }, "geo": { "@type": "GeoCoordinates", "latitude": "", "longitude": "", }, "telephone": "", "openingHours": "", }, } DIMENSION_SCHEMA_MAP = { "schema_marketing": ["Organization", "LocalBusiness"], "entity_clarity": ["Organization", "Product"], "citation_readiness": ["FAQPage", "Article"], "brand_visibility": ["Organization", "Product"], "local_seo": ["LocalBusiness"], } PRIORITY_THRESHOLD = { "high": 30.0, "medium": 60.0, } DIFFICULTY_MAP = { "Organization": "easy", "Product": "medium", "FAQPage": "medium", "Article": "easy", "LocalBusiness": "hard", } class SchemaAdvisorService: async def generate_suggestions( self, db: AsyncSession, brand_id: uuid.UUID, diagnosis_data: dict, brand_info: dict, target_url: str | None = None, focus_dimensions: list[str] | None = None, ) -> list[SchemaSuggestion]: missing_dimensions = self._identify_missing_dimensions(diagnosis_data, focus_dimensions) matched = self.match_templates(missing_dimensions) filled = await self.fill_template_with_llm(matched, brand_info) suggestions = [] for item in filled: validation = self.validate_json_ld(item.get("json_ld_filled") or {}) suggestion = SchemaSuggestion( brand_id=brand_id, schema_type=item["schema_type"], target_url=target_url, json_ld_template=item["json_ld_template"], json_ld_filled=item.get("json_ld_filled"), priority=item["priority"], status="pending", diagnosis_dimensions=item.get("diagnosis_dimensions"), implementation_difficulty=DIFFICULTY_MAP.get(item["schema_type"], "medium"), estimated_impact=item.get("estimated_impact"), validation_errors=None if validation["is_valid"] else {"errors": validation["errors"]}, ) db.add(suggestion) suggestions.append(suggestion) await db.commit() for s in suggestions: await db.refresh(s) return self.prioritize_suggestions(suggestions) def match_templates(self, missing_dimensions: list[dict]) -> list[dict]: matched = [] seen_types = set() for dim in missing_dimensions: schema_types = DIMENSION_SCHEMA_MAP.get(dim["dimension"], []) for schema_type in schema_types: if schema_type in seen_types: continue seen_types.add(schema_type) template = SCHEMA_TEMPLATES.get(schema_type) if template: import copy percentage = dim["percentage"] if percentage < PRIORITY_THRESHOLD["high"]: priority = "high" elif percentage < PRIORITY_THRESHOLD["medium"]: priority = "medium" else: priority = "low" matched.append({ "schema_type": schema_type, "priority": priority, "diagnosis_dimensions": { "dimension": dim["dimension"], "current_score": dim["current_score"], "max_score": dim["max_score"], "percentage": dim["percentage"], }, "json_ld_template": copy.deepcopy(template), }) return matched async def fill_template_with_llm(self, matched: list[dict], brand_info: dict) -> list[dict]: provider = LLMFactory.get_default() results = [] for item in matched: schema_type = item["schema_type"] template = item["json_ld_template"] try: variables = { "brand_name": brand_info.get("name", ""), "brand_website": brand_info.get("website", ""), "brand_industry": brand_info.get("industry", ""), "schema_type": schema_type, "diagnosis_data": json.dumps(item.get("diagnosis_dimensions", {}), ensure_ascii=False), "existing_schemas": "无", } messages = SCHEMA_ADVISOR_TEMPLATE.render(variables) response = await provider.chat( messages, temperature=0.3, max_tokens=2048, ) filled = json.loads(extract_json(response.content)) item["json_ld_filled"] = filled item["estimated_impact"] = self._generate_impact_description( schema_type, item.get("diagnosis_dimensions", {}).get("dimension", "") ) except (json.JSONDecodeError, LLMError, ValueError) as e: logger.warning(f"LLM填充Schema {schema_type} 失败: {e}") item["json_ld_filled"] = None item["estimated_impact"] = self._generate_impact_description( schema_type, item.get("diagnosis_dimensions", {}).get("dimension", "") ) results.append(item) return results def validate_json_ld(self, json_ld: dict) -> dict: errors = [] warnings = [] if not json_ld: return {"is_valid": False, "errors": ["JSON-LD为空"], "warnings": []} if "@context" not in json_ld: errors.append("缺少@context字段") if "@type" not in json_ld: errors.append("缺少@type字段") if "@context" in json_ld and json_ld["@context"] != "https://schema.org": warnings.append(f"@context值非标准: {json_ld.get('@context')}") if "@type" in json_ld and json_ld["@type"] not in SCHEMA_TEMPLATES: warnings.append(f"@type非推荐类型: {json_ld.get('@type')}") try: json.dumps(json_ld) except (json.JSONDecodeError, TypeError) as e: errors.append(f"JSON序列化失败: {e}") return { "is_valid": len(errors) == 0, "errors": errors, "warnings": warnings, } def prioritize_suggestions(self, suggestions: list[SchemaSuggestion]) -> list[SchemaSuggestion]: priority_order = {"high": 0, "medium": 1, "low": 2} return sorted(suggestions, key=lambda x: priority_order.get(x.priority, 1)) async def get_suggestions( self, db: AsyncSession, brand_id: uuid.UUID, status_filter: str | None = None, schema_type: str | None = None, skip: int = 0, limit: int = 20, ) -> tuple[list[SchemaSuggestion], int]: conditions = [SchemaSuggestion.brand_id == brand_id] if status_filter: conditions.append(SchemaSuggestion.status == status_filter) if schema_type: conditions.append(SchemaSuggestion.schema_type == schema_type) count_stmt = select(func.count()).select_from(SchemaSuggestion).where(*conditions) count_result = await db.execute(count_stmt) total = count_result.scalar_one() stmt = ( select(SchemaSuggestion) .where(*conditions) .order_by(SchemaSuggestion.created_at.desc()) .offset(skip) .limit(limit) ) result = await db.execute(stmt) suggestions = list(result.scalars().all()) return self.prioritize_suggestions(suggestions), total async def get_suggestion_by_id( self, db: AsyncSession, suggestion_id: uuid.UUID, ) -> SchemaSuggestion | None: stmt = select(SchemaSuggestion).where(SchemaSuggestion.id == suggestion_id) result = await db.execute(stmt) return result.scalar_one_or_none() async def update_status( self, db: AsyncSession, suggestion_id: uuid.UUID, new_status: str, ) -> SchemaSuggestion | None: stmt = select(SchemaSuggestion).where(SchemaSuggestion.id == suggestion_id) result = await db.execute(stmt) suggestion = result.scalar_one_or_none() if not suggestion: return None suggestion.status = new_status await db.commit() await db.refresh(suggestion) return suggestion def _identify_missing_dimensions( self, diagnosis_data: dict, focus_dimensions: list[str] | None = None, ) -> list[dict]: dimensions = [] dimension_scores = diagnosis_data.get("dimensions", {}) for dim_name, dim_info in dimension_scores.items(): if dim_name not in DIMENSION_SCHEMA_MAP: continue if focus_dimensions and dim_name not in focus_dimensions: continue score = dim_info.get("score", 0) if isinstance(dim_info, dict) else dim_info max_score = dim_info.get("max_score", 100) if isinstance(dim_info, dict) else 100 percentage = (score / max_score * 100) if max_score > 0 else 0 if percentage < 80: dimensions.append({ "dimension": dim_name, "current_score": round(score, 2), "max_score": max_score, "percentage": round(percentage, 2), }) if not dimensions and diagnosis_data: overall = diagnosis_data.get("overall_score", 0) if overall < 80: for dim_name in DIMENSION_SCHEMA_MAP: if focus_dimensions and dim_name not in focus_dimensions: continue dimensions.append({ "dimension": dim_name, "current_score": 0, "max_score": 100, "percentage": 0, }) return dimensions def _generate_impact_description(self, schema_type: str, dimension: str) -> str: impacts = { "Organization": "增强品牌实体识别,提升AI搜索引擎对品牌的理解和引用概率", "Product": "提升产品在搜索结果中的富摘要展示,增加点击率和引用率", "FAQPage": "增加FAQ富摘要展示机会,提升在AI回答中的直接引用概率", "Article": "优化文章内容的结构化表达,提升AI搜索引擎的内容理解和引用", "LocalBusiness": "增强本地搜索可见性,提升地理位置相关查询的引用率", } return impacts.get(schema_type, f"提升{dimension}维度的得分和AI引用率")