This commit is contained in:
DengDai
2025-12-08 14:47:24 +08:00
commit 644b5aaaf8
21 changed files with 1543 additions and 0 deletions

0
pt_gen/core/__init__.py Normal file
View File

50
pt_gen/core/config.py Normal file
View File

@@ -0,0 +1,50 @@
import yaml
from functools import lru_cache
from pydantic import BaseModel
from pydantic_settings import BaseSettings
from typing import Optional
CONFIG_PATH = "configs/config.yaml"
class TMDBConfig(BaseModel):
api_key: Optional[str] = None
class DoubanConfig(BaseModel):
cookie: Optional[str] = None
class RedisConfig(BaseModel):
host: str
port: int
db: int
cache_ttl_seconds: int
class UploaderConfig(BaseModel):
enable: bool
api_url: str
api_key: str
class Settings(BaseSettings):
api_key: str
tmdb: TMDBConfig
douban: DoubanConfig
redis: RedisConfig
uploader: UploaderConfig
def set_config_path(path: str = "configs/config.yaml"):
"""允许在启动时设置配置文件路径"""
global CONFIG_PATH
CONFIG_PATH = path
@lru_cache() # 使用 lru_cache 确保配置文件只被读取和解析一次
def get_settings() -> Settings:
"""
加载并返回配置对象。
这是一个可被 FastAPI 依赖注入的函数。
"""
try:
with open(CONFIG_PATH, 'r', encoding='utf-8') as f:
config_data = yaml.safe_load(f)
return Settings.parse_obj(config_data)
except FileNotFoundError:
raise RuntimeError(f"配置文件未找到: {CONFIG_PATH}")
except Exception as e:
raise RuntimeError(f"加载配置文件失败: {e}")

177
pt_gen/core/orchestrator.py Normal file
View File

@@ -0,0 +1,177 @@
import re
from typing import Optional
from jinja2 import Environment, FileSystemLoader
from pt_gen.core.config import Settings
from pt_gen.models.movie import MovieInfo
from pt_gen.services.cache import RedisCache
from pt_gen.services.douban import DoubanScraper
from pt_gen.services.tmdb import TMDBClient
from pt_gen.services.uploader import ImageUploader
class InfoOrchestrator:
def __init__(self, settings: Settings):
self.settings = settings
self.tmdb = TMDBClient(api_key=self.settings.tmdb.api_key)
self.douban = DoubanScraper(cookie=self.settings.douban.cookie)
self.cache = RedisCache(
host=self.settings.redis.host,
port=self.settings.redis.port,
db=self.settings.redis.db
)
if self.settings.uploader.enable:
self.uploader = ImageUploader(
api_url=self.settings.uploader.api_url,
api_key=self.settings.uploader.api_key
)
else:
self.uploader = None
self.jinja_env = Environment(loader=FileSystemLoader('templates'))
def _extract_douban_id(self, url: str) -> Optional[str]:
match = re.search(r'douban\.com/subject/(\d+)', url)
return match.group(1) if match else None
def _extract_imdb_id(self, url: str) -> Optional[str]:
match = re.search(r'imdb\.com/title/(tt\d+)', url)
return match.group(1) if match else None
def _extract_tmdb_id(self, url: str) -> Optional[str]:
match = re.search(r'themoviedb\.org/(?:movie|tv)/(\d+)', url)
return match.group(1) if match else None
async def generate_info(self, url: str) -> Optional[str]:
# --- 步骤 1: 识别URL类型并提取初始ID ---
douban_id = self._extract_douban_id(url)
imdb_id = self._extract_imdb_id(url)
tmdb_id = self._extract_tmdb_id(url)
if not (douban_id or imdb_id or tmdb_id):
return "无效或不支持的链接。请输入有效的豆瓣、IMDb或TMDB链接。"
# --- 步骤 2: ID 补全 (以 IMDb ID 为核心) ---
# 如果从 TMDB 链接开始,先找到 IMDb ID
if tmdb_id and not imdb_id:
details = await self.tmdb.get_movie_details(tmdb_id)
if details and details.get('imdb_id'):
imdb_id = details['imdb_id']
# 如果从豆瓣链接开始,爬取豆瓣页面来找到 IMDb ID
if douban_id and not imdb_id:
# 临时爬一次,只为获取 IMDb ID
temp_douban_data = await self.douban.scrape_movie_info(douban_id)
if temp_douban_data.get('imdb_id'):
imdb_id = temp_douban_data['imdb_id']
# --- 步骤 3: 查缓存 (使用唯一的 IMDb ID 作为 key) ---
if imdb_id:
cache_key = f"movieinfo:{imdb_id}"
cached_result = await self.cache.get(cache_key)
if cached_result:
print(f"命中缓存: {cache_key}")
return cached_result
else:
# 如果没有IMDb ID (例如一个没有IMDB链接的冷门豆瓣电影), 则用豆瓣ID做缓存key
cache_key = f"movieinfo:douban:{douban_id}"
# --- 步骤 4: 分别获取数据 ---
douban_data = {}
tmdb_data = {}
# 如果有豆瓣ID就从豆瓣获取数据
if douban_id:
douban_data = await self.douban.scrape_movie_info(douban_id)
if not douban_data:
return "从豆瓣获取信息失败请检查链接或Cookie。"
# 如果有IMDb ID就从TMDB获取数据
if imdb_id:
tmdb_find_data = await self.tmdb.find_by_imdb_id(imdb_id)
if tmdb_find_data and tmdb_find_data.get('movie_results'):
# 确保 tmdb_id 被正确设置
tmdb_id = tmdb_find_data['movie_results'][0]['id']
tmdb_data = await self.tmdb.get_movie_details(tmdb_id)
if imdb_id and not tmdb_id:
tmdb_find_data = await self.tmdb.find_by_imdb_id(imdb_id)
if tmdb_find_data and tmdb_find_data.get('movie_results'):
tmdb_id = tmdb_find_data['movie_results'][0]['id']
if tmdb_id:
# 现在 get_movie_details 返回的是干净的数据
tmdb_data = await self.tmdb.get_movie_details(tmdb_id)
# --- 步骤 5: 智能合并数据到 MovieInfo 对象 ---
movie = MovieInfo()
# 优先使用TMDB数据因为通常更规范
if tmdb_data:
movie.tmdb_id = tmdb_data.get('id')
movie.imdb_id = tmdb_data.get('imdb_id')
movie.original_title = tmdb_data.get('original_title')
movie.chinese_title = tmdb_data.get('title')
movie.aka_titles = tmdb_data.get('aka_titles', [])
movie.year = tmdb_data.get('release_date', '')[:4]
movie.release_date = tmdb_data.get('release_date')
movie.runtime = tmdb_data.get('runtime')
movie.spoken_languages = tmdb_data.get('spoken_languages', [])
movie.tagline = tmdb_data.get('tagline')
movie.imdb_rating = f"{tmdb_data.get('vote_average', 0):.1f}/10"
movie.directors = tmdb_data.get('directors', [])
movie.writers = tmdb_data.get('writers', [])
movie.actors = tmdb_data.get('actors', [])
movie.genres = tmdb_data.get('genres', [])
movie.countries = tmdb_data.get('countries', [])
# TMDB的简介可能是空的先用着后面会被豆瓣覆盖
movie.synopsis = tmdb_data.get('overview', '')
if tmdb_data.get('poster_path'):
movie.poster_url = f"https://image.tmdb.org/t/p/original{tmdb_data['poster_path']}"
# 使用豆瓣数据进行补充或覆盖
if douban_data:
movie.douban_id = douban_id
movie.douban_link = f"https://movie.douban.com/subject/{douban_id}/"
# 智能填充如果TMDB没值才用豆瓣的
movie.chinese_title = movie.chinese_title or douban_data.get('chinese_title')
movie.year = movie.year or douban_data.get('year')
movie.runtime = movie.runtime or douban_data.get('runtime')
movie.spoken_languages = movie.spoken_languages or douban_data.get('spoken_languages', [])
movie.directors = movie.directors or douban_data.get('directors', [])
movie.writers = movie.writers or douban_data.get('writers', [])
movie.actors = movie.actors or douban_data.get('actors', [])
movie.genres = movie.genres or douban_data.get('genres', [])
movie.countries = movie.countries or douban_data.get('countries', [])
movie.poster_url = movie.poster_url or douban_data.get('poster_url')
# 强制覆盖:豆瓣的简介、评分和获奖通常更符合中文区需求
if douban_data.get('synopsis'):
movie.synopsis = douban_data.get('synopsis')
movie.douban_rating = douban_data.get('douban_rating', 'N/A')
movie.awards = douban_data.get('awards', [])
# 确保IMDb链接存在
if movie.imdb_id:
movie.imdb_link = f"https://www.imdb.com/title/{movie.imdb_id}/"
# 确保标题有备用值
if not movie.original_title:
movie.original_title = movie.chinese_title
if not movie.chinese_title:
movie.chinese_title = movie.original_title
# --- 步骤 6: 后续处理 (图片转存、渲染、缓存) ---
if self.uploader and movie.poster_url:
print(f"开始转存图片: {movie.poster_url}")
movie.poster_url = await self.uploader.upload(movie.poster_url)
print(f"转存成功新URL: {movie.poster_url}")
template = self.jinja_env.get_template('description.jinja2')
final_text = template.render(movie.dict())
# 写入缓存
await self.cache.set(cache_key, final_text, ttl=self.settings.redis.cache_ttl_seconds)
print("\n" + "="*40 + " FINAL RENDERED OUTPUT " + "="*40)
print(final_text)
print("="*103 + "\n")
return final_text