350 lines
13 KiB
Python
350 lines
13 KiB
Python
import json
|
||
import logging
|
||
import uuid
|
||
from datetime import datetime, timezone
|
||
|
||
from sqlalchemy import select, func
|
||
from sqlalchemy.ext.asyncio import AsyncSession
|
||
|
||
from app.models.schema_suggestion import SchemaSuggestion
|
||
from app.services.llm import LLMFactory, LLMError
|
||
from app.prompts.schema_advisor import SCHEMA_ADVISOR_TEMPLATE
|
||
from app.utils.json_extractor import extract_json
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
SCHEMA_TEMPLATES = {
|
||
"Organization": {
|
||
"@context": "https://schema.org",
|
||
"@type": "Organization",
|
||
"name": "",
|
||
"description": "",
|
||
"url": "",
|
||
"logo": "",
|
||
"sameAs": [],
|
||
"contactPoint": {
|
||
"@type": "ContactPoint",
|
||
"contactType": "customer service",
|
||
"telephone": "",
|
||
},
|
||
},
|
||
"Product": {
|
||
"@context": "https://schema.org",
|
||
"@type": "Product",
|
||
"name": "",
|
||
"description": "",
|
||
"brand": {"@type": "Brand", "name": ""},
|
||
"offers": {
|
||
"@type": "Offer",
|
||
"priceCurrency": "CNY",
|
||
"availability": "https://schema.org/InStock",
|
||
},
|
||
},
|
||
"FAQPage": {
|
||
"@context": "https://schema.org",
|
||
"@type": "FAQPage",
|
||
"mainEntity": [
|
||
{
|
||
"@type": "Question",
|
||
"name": "",
|
||
"acceptedAnswer": {
|
||
"@type": "Answer",
|
||
"text": "",
|
||
},
|
||
}
|
||
],
|
||
},
|
||
"Article": {
|
||
"@context": "https://schema.org",
|
||
"@type": "Article",
|
||
"headline": "",
|
||
"description": "",
|
||
"author": {"@type": "Organization", "name": ""},
|
||
"datePublished": "",
|
||
"image": "",
|
||
},
|
||
"LocalBusiness": {
|
||
"@context": "https://schema.org",
|
||
"@type": "LocalBusiness",
|
||
"name": "",
|
||
"address": {
|
||
"@type": "PostalAddress",
|
||
"streetAddress": "",
|
||
"addressLocality": "",
|
||
"addressRegion": "",
|
||
"postalCode": "",
|
||
"addressCountry": "CN",
|
||
},
|
||
"geo": {
|
||
"@type": "GeoCoordinates",
|
||
"latitude": "",
|
||
"longitude": "",
|
||
},
|
||
"telephone": "",
|
||
"openingHours": "",
|
||
},
|
||
}
|
||
|
||
DIMENSION_SCHEMA_MAP = {
|
||
"schema_marketing": ["Organization", "LocalBusiness"],
|
||
"entity_clarity": ["Organization", "Product"],
|
||
"citation_readiness": ["FAQPage", "Article"],
|
||
"brand_visibility": ["Organization", "Product"],
|
||
"local_seo": ["LocalBusiness"],
|
||
}
|
||
|
||
PRIORITY_THRESHOLD = {
|
||
"high": 30.0,
|
||
"medium": 60.0,
|
||
}
|
||
|
||
DIFFICULTY_MAP = {
|
||
"Organization": "easy",
|
||
"Product": "medium",
|
||
"FAQPage": "medium",
|
||
"Article": "easy",
|
||
"LocalBusiness": "hard",
|
||
}
|
||
|
||
|
||
class SchemaAdvisorService:
|
||
|
||
async def generate_suggestions(
|
||
self,
|
||
db: AsyncSession,
|
||
brand_id: uuid.UUID,
|
||
diagnosis_data: dict,
|
||
brand_info: dict,
|
||
target_url: str | None = None,
|
||
focus_dimensions: list[str] | None = None,
|
||
) -> list[SchemaSuggestion]:
|
||
missing_dimensions = self._identify_missing_dimensions(diagnosis_data, focus_dimensions)
|
||
matched = self.match_templates(missing_dimensions)
|
||
filled = await self.fill_template_with_llm(matched, brand_info)
|
||
|
||
suggestions = []
|
||
for item in filled:
|
||
validation = self.validate_json_ld(item.get("json_ld_filled") or {})
|
||
suggestion = SchemaSuggestion(
|
||
brand_id=brand_id,
|
||
schema_type=item["schema_type"],
|
||
target_url=target_url,
|
||
json_ld_template=item["json_ld_template"],
|
||
json_ld_filled=item.get("json_ld_filled"),
|
||
priority=item["priority"],
|
||
status="pending",
|
||
diagnosis_dimensions=item.get("diagnosis_dimensions"),
|
||
implementation_difficulty=DIFFICULTY_MAP.get(item["schema_type"], "medium"),
|
||
estimated_impact=item.get("estimated_impact"),
|
||
validation_errors=None if validation["is_valid"] else {"errors": validation["errors"]},
|
||
)
|
||
db.add(suggestion)
|
||
suggestions.append(suggestion)
|
||
|
||
await db.commit()
|
||
for s in suggestions:
|
||
await db.refresh(s)
|
||
return self.prioritize_suggestions(suggestions)
|
||
|
||
def match_templates(self, missing_dimensions: list[dict]) -> list[dict]:
|
||
matched = []
|
||
seen_types = set()
|
||
for dim in missing_dimensions:
|
||
schema_types = DIMENSION_SCHEMA_MAP.get(dim["dimension"], [])
|
||
for schema_type in schema_types:
|
||
if schema_type in seen_types:
|
||
continue
|
||
seen_types.add(schema_type)
|
||
template = SCHEMA_TEMPLATES.get(schema_type)
|
||
if template:
|
||
import copy
|
||
percentage = dim["percentage"]
|
||
if percentage < PRIORITY_THRESHOLD["high"]:
|
||
priority = "high"
|
||
elif percentage < PRIORITY_THRESHOLD["medium"]:
|
||
priority = "medium"
|
||
else:
|
||
priority = "low"
|
||
matched.append({
|
||
"schema_type": schema_type,
|
||
"priority": priority,
|
||
"diagnosis_dimensions": {
|
||
"dimension": dim["dimension"],
|
||
"current_score": dim["current_score"],
|
||
"max_score": dim["max_score"],
|
||
"percentage": dim["percentage"],
|
||
},
|
||
"json_ld_template": copy.deepcopy(template),
|
||
})
|
||
return matched
|
||
|
||
async def fill_template_with_llm(self, matched: list[dict], brand_info: dict) -> list[dict]:
|
||
provider = LLMFactory.get_default()
|
||
results = []
|
||
for item in matched:
|
||
schema_type = item["schema_type"]
|
||
template = item["json_ld_template"]
|
||
try:
|
||
variables = {
|
||
"brand_name": brand_info.get("name", ""),
|
||
"brand_website": brand_info.get("website", ""),
|
||
"brand_industry": brand_info.get("industry", ""),
|
||
"schema_type": schema_type,
|
||
"diagnosis_data": json.dumps(item.get("diagnosis_dimensions", {}), ensure_ascii=False),
|
||
"existing_schemas": "无",
|
||
}
|
||
messages = SCHEMA_ADVISOR_TEMPLATE.render(variables)
|
||
response = await provider.chat(
|
||
messages,
|
||
temperature=0.3,
|
||
max_tokens=2048,
|
||
)
|
||
filled = json.loads(extract_json(response.content))
|
||
item["json_ld_filled"] = filled
|
||
item["estimated_impact"] = self._generate_impact_description(
|
||
schema_type, item.get("diagnosis_dimensions", {}).get("dimension", "")
|
||
)
|
||
except (json.JSONDecodeError, LLMError, ValueError) as e:
|
||
logger.warning(f"LLM填充Schema {schema_type} 失败: {e}")
|
||
item["json_ld_filled"] = None
|
||
item["estimated_impact"] = self._generate_impact_description(
|
||
schema_type, item.get("diagnosis_dimensions", {}).get("dimension", "")
|
||
)
|
||
results.append(item)
|
||
return results
|
||
|
||
def validate_json_ld(self, json_ld: dict) -> dict:
|
||
errors = []
|
||
warnings = []
|
||
|
||
if not json_ld:
|
||
return {"is_valid": False, "errors": ["JSON-LD为空"], "warnings": []}
|
||
|
||
if "@context" not in json_ld:
|
||
errors.append("缺少@context字段")
|
||
|
||
if "@type" not in json_ld:
|
||
errors.append("缺少@type字段")
|
||
|
||
if "@context" in json_ld and json_ld["@context"] != "https://schema.org":
|
||
warnings.append(f"@context值非标准: {json_ld.get('@context')}")
|
||
|
||
if "@type" in json_ld and json_ld["@type"] not in SCHEMA_TEMPLATES:
|
||
warnings.append(f"@type非推荐类型: {json_ld.get('@type')}")
|
||
|
||
try:
|
||
json.dumps(json_ld)
|
||
except (json.JSONDecodeError, TypeError) as e:
|
||
errors.append(f"JSON序列化失败: {e}")
|
||
|
||
return {
|
||
"is_valid": len(errors) == 0,
|
||
"errors": errors,
|
||
"warnings": warnings,
|
||
}
|
||
|
||
def prioritize_suggestions(self, suggestions: list[SchemaSuggestion]) -> list[SchemaSuggestion]:
|
||
priority_order = {"high": 0, "medium": 1, "low": 2}
|
||
return sorted(suggestions, key=lambda x: priority_order.get(x.priority, 1))
|
||
|
||
async def get_suggestions(
|
||
self,
|
||
db: AsyncSession,
|
||
brand_id: uuid.UUID,
|
||
status_filter: str | None = None,
|
||
schema_type: str | None = None,
|
||
skip: int = 0,
|
||
limit: int = 20,
|
||
) -> tuple[list[SchemaSuggestion], int]:
|
||
conditions = [SchemaSuggestion.brand_id == brand_id]
|
||
if status_filter:
|
||
conditions.append(SchemaSuggestion.status == status_filter)
|
||
if schema_type:
|
||
conditions.append(SchemaSuggestion.schema_type == schema_type)
|
||
|
||
count_stmt = select(func.count()).select_from(SchemaSuggestion).where(*conditions)
|
||
count_result = await db.execute(count_stmt)
|
||
total = count_result.scalar_one()
|
||
|
||
stmt = (
|
||
select(SchemaSuggestion)
|
||
.where(*conditions)
|
||
.order_by(SchemaSuggestion.created_at.desc())
|
||
.offset(skip)
|
||
.limit(limit)
|
||
)
|
||
result = await db.execute(stmt)
|
||
suggestions = list(result.scalars().all())
|
||
return self.prioritize_suggestions(suggestions), total
|
||
|
||
async def get_suggestion_by_id(
|
||
self,
|
||
db: AsyncSession,
|
||
suggestion_id: uuid.UUID,
|
||
) -> SchemaSuggestion | None:
|
||
stmt = select(SchemaSuggestion).where(SchemaSuggestion.id == suggestion_id)
|
||
result = await db.execute(stmt)
|
||
return result.scalar_one_or_none()
|
||
|
||
async def update_status(
|
||
self,
|
||
db: AsyncSession,
|
||
suggestion_id: uuid.UUID,
|
||
new_status: str,
|
||
) -> SchemaSuggestion | None:
|
||
stmt = select(SchemaSuggestion).where(SchemaSuggestion.id == suggestion_id)
|
||
result = await db.execute(stmt)
|
||
suggestion = result.scalar_one_or_none()
|
||
if not suggestion:
|
||
return None
|
||
suggestion.status = new_status
|
||
await db.commit()
|
||
await db.refresh(suggestion)
|
||
return suggestion
|
||
|
||
def _identify_missing_dimensions(
|
||
self,
|
||
diagnosis_data: dict,
|
||
focus_dimensions: list[str] | None = None,
|
||
) -> list[dict]:
|
||
dimensions = []
|
||
dimension_scores = diagnosis_data.get("dimensions", {})
|
||
for dim_name, dim_info in dimension_scores.items():
|
||
if dim_name not in DIMENSION_SCHEMA_MAP:
|
||
continue
|
||
if focus_dimensions and dim_name not in focus_dimensions:
|
||
continue
|
||
score = dim_info.get("score", 0) if isinstance(dim_info, dict) else dim_info
|
||
max_score = dim_info.get("max_score", 100) if isinstance(dim_info, dict) else 100
|
||
percentage = (score / max_score * 100) if max_score > 0 else 0
|
||
if percentage < 80:
|
||
dimensions.append({
|
||
"dimension": dim_name,
|
||
"current_score": round(score, 2),
|
||
"max_score": max_score,
|
||
"percentage": round(percentage, 2),
|
||
})
|
||
if not dimensions and diagnosis_data:
|
||
overall = diagnosis_data.get("overall_score", 0)
|
||
if overall < 80:
|
||
for dim_name in DIMENSION_SCHEMA_MAP:
|
||
if focus_dimensions and dim_name not in focus_dimensions:
|
||
continue
|
||
dimensions.append({
|
||
"dimension": dim_name,
|
||
"current_score": 0,
|
||
"max_score": 100,
|
||
"percentage": 0,
|
||
})
|
||
return dimensions
|
||
|
||
def _generate_impact_description(self, schema_type: str, dimension: str) -> str:
|
||
impacts = {
|
||
"Organization": "增强品牌实体识别,提升AI搜索引擎对品牌的理解和引用概率",
|
||
"Product": "提升产品在搜索结果中的富摘要展示,增加点击率和引用率",
|
||
"FAQPage": "增加FAQ富摘要展示机会,提升在AI回答中的直接引用概率",
|
||
"Article": "优化文章内容的结构化表达,提升AI搜索引擎的内容理解和引用",
|
||
"LocalBusiness": "增强本地搜索可见性,提升地理位置相关查询的引用率",
|
||
}
|
||
return impacts.get(schema_type, f"提升{dimension}维度的得分和AI引用率")
|