geo/backend/app/services/content/content_pipeline.py

153 lines
5.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import time
from dataclasses import dataclass, field
from typing import Optional, Any, List
from app.services.content.rule_validator import RuleValidator
from app.services.content.sensitive_filter import SensitiveFilter
from app.services.content.seo_optimizer import SEOOptimizer
from app.services.content.html_generator import HTMLGenerator
@dataclass
class PipelineStage:
name: str
passed: bool
result: Any = None
duration: float = 0.0
error: Optional[str] = None
@dataclass
class PipelineOutput:
html: str = ""
markdown: str = ""
plain: str = ""
@dataclass
class PipelineResponse:
stages: List[PipelineStage] = field(default_factory=list)
outputs: Optional[PipelineOutput] = None
error: Optional[str] = None
class ContentPipeline:
def __init__(self):
self.validator = RuleValidator()
self.sensitive_filter = SensitiveFilter()
self.seo_optimizer = SEOOptimizer()
self.html_generator = HTMLGenerator()
async def run(self, request: dict) -> PipelineResponse:
"""执行完整内容处理Pipeline"""
stages = []
content = request.get("content", "")
title = request.get("title", "")
platform = request.get("platform", "")
optimize_for = request.get("optimize_for", ["validation"])
output_formats = request.get("output_formats", ["html", "markdown", "plain"])
current_content = content
try:
# Stage 1: 规则校验
if "validation" in optimize_for:
start = time.time()
try:
validation_result = self.validator.validate(current_content, title, platform)
duration = time.time() - start
stages.append(PipelineStage(
name="validation",
passed=validation_result.is_valid,
result=validation_result,
duration=duration
))
# 如果校验失败高严重级别问题中断Pipeline
if not validation_result.is_valid:
return PipelineResponse(
stages=stages,
outputs=None,
error="内容校验未通过"
)
except Exception as e:
stages.append(PipelineStage(
name="validation",
passed=False,
error=str(e),
duration=time.time() - start
))
return PipelineResponse(stages=stages, error=str(e))
# Stage 2: 敏感词过滤
if "sensitive" in optimize_for:
start = time.time()
try:
filter_result = self.sensitive_filter.filter(current_content, platform)
duration = time.time() - start
current_content = filter_result.filtered_content
stages.append(PipelineStage(
name="sensitive_filter",
passed=True,
result=filter_result,
duration=duration
))
except Exception as e:
stages.append(PipelineStage(
name="sensitive_filter",
passed=False,
error=str(e),
duration=time.time() - start
))
# Stage 3: SEO优化
if "seo" in optimize_for:
start = time.time()
try:
keyword = request.get("keyword", "")
seo_result = self.seo_optimizer.optimize(current_content, title, platform, keyword)
duration = time.time() - start
stages.append(PipelineStage(
name="seo_optimization",
passed=True,
result=seo_result,
duration=duration
))
except Exception as e:
stages.append(PipelineStage(
name="seo_optimization",
passed=False,
error=str(e),
duration=time.time() - start
))
# Stage 4: HTML生成
outputs = PipelineOutput()
if "html" in output_formats or (not output_formats):
outputs.html = self.html_generator.generate(current_content, platform, "html")
if "markdown" in output_formats:
outputs.markdown = self.html_generator.to_markdown(current_content)
if "plain" in output_formats:
outputs.plain = self.html_generator.to_plain(current_content)
stages.append(PipelineStage(
name="html_generation",
passed=True,
result=outputs
))
return PipelineResponse(stages=stages, outputs=outputs)
except Exception as e:
return PipelineResponse(stages=stages, error=str(e))
async def validate_only(self, content: str, title: str, platform: str):
"""仅执行校验,不处理"""
return self.validator.validate(content, title, platform)