Files
PTGen/pt_gen/core/orchestrator.py
DengDai 644b5aaaf8 init
2025-12-08 14:47:24 +08:00

178 lines
8.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
from typing import Optional
from jinja2 import Environment, FileSystemLoader
from pt_gen.core.config import Settings
from pt_gen.models.movie import MovieInfo
from pt_gen.services.cache import RedisCache
from pt_gen.services.douban import DoubanScraper
from pt_gen.services.tmdb import TMDBClient
from pt_gen.services.uploader import ImageUploader
class InfoOrchestrator:
def __init__(self, settings: Settings):
self.settings = settings
self.tmdb = TMDBClient(api_key=self.settings.tmdb.api_key)
self.douban = DoubanScraper(cookie=self.settings.douban.cookie)
self.cache = RedisCache(
host=self.settings.redis.host,
port=self.settings.redis.port,
db=self.settings.redis.db
)
if self.settings.uploader.enable:
self.uploader = ImageUploader(
api_url=self.settings.uploader.api_url,
api_key=self.settings.uploader.api_key
)
else:
self.uploader = None
self.jinja_env = Environment(loader=FileSystemLoader('templates'))
def _extract_douban_id(self, url: str) -> Optional[str]:
match = re.search(r'douban\.com/subject/(\d+)', url)
return match.group(1) if match else None
def _extract_imdb_id(self, url: str) -> Optional[str]:
match = re.search(r'imdb\.com/title/(tt\d+)', url)
return match.group(1) if match else None
def _extract_tmdb_id(self, url: str) -> Optional[str]:
match = re.search(r'themoviedb\.org/(?:movie|tv)/(\d+)', url)
return match.group(1) if match else None
async def generate_info(self, url: str) -> Optional[str]:
# --- 步骤 1: 识别URL类型并提取初始ID ---
douban_id = self._extract_douban_id(url)
imdb_id = self._extract_imdb_id(url)
tmdb_id = self._extract_tmdb_id(url)
if not (douban_id or imdb_id or tmdb_id):
return "无效或不支持的链接。请输入有效的豆瓣、IMDb或TMDB链接。"
# --- 步骤 2: ID 补全 (以 IMDb ID 为核心) ---
# 如果从 TMDB 链接开始,先找到 IMDb ID
if tmdb_id and not imdb_id:
details = await self.tmdb.get_movie_details(tmdb_id)
if details and details.get('imdb_id'):
imdb_id = details['imdb_id']
# 如果从豆瓣链接开始,爬取豆瓣页面来找到 IMDb ID
if douban_id and not imdb_id:
# 临时爬一次,只为获取 IMDb ID
temp_douban_data = await self.douban.scrape_movie_info(douban_id)
if temp_douban_data.get('imdb_id'):
imdb_id = temp_douban_data['imdb_id']
# --- 步骤 3: 查缓存 (使用唯一的 IMDb ID 作为 key) ---
if imdb_id:
cache_key = f"movieinfo:{imdb_id}"
cached_result = await self.cache.get(cache_key)
if cached_result:
print(f"命中缓存: {cache_key}")
return cached_result
else:
# 如果没有IMDb ID (例如一个没有IMDB链接的冷门豆瓣电影), 则用豆瓣ID做缓存key
cache_key = f"movieinfo:douban:{douban_id}"
# --- 步骤 4: 分别获取数据 ---
douban_data = {}
tmdb_data = {}
# 如果有豆瓣ID就从豆瓣获取数据
if douban_id:
douban_data = await self.douban.scrape_movie_info(douban_id)
if not douban_data:
return "从豆瓣获取信息失败请检查链接或Cookie。"
# 如果有IMDb ID就从TMDB获取数据
if imdb_id:
tmdb_find_data = await self.tmdb.find_by_imdb_id(imdb_id)
if tmdb_find_data and tmdb_find_data.get('movie_results'):
# 确保 tmdb_id 被正确设置
tmdb_id = tmdb_find_data['movie_results'][0]['id']
tmdb_data = await self.tmdb.get_movie_details(tmdb_id)
if imdb_id and not tmdb_id:
tmdb_find_data = await self.tmdb.find_by_imdb_id(imdb_id)
if tmdb_find_data and tmdb_find_data.get('movie_results'):
tmdb_id = tmdb_find_data['movie_results'][0]['id']
if tmdb_id:
# 现在 get_movie_details 返回的是干净的数据
tmdb_data = await self.tmdb.get_movie_details(tmdb_id)
# --- 步骤 5: 智能合并数据到 MovieInfo 对象 ---
movie = MovieInfo()
# 优先使用TMDB数据因为通常更规范
if tmdb_data:
movie.tmdb_id = tmdb_data.get('id')
movie.imdb_id = tmdb_data.get('imdb_id')
movie.original_title = tmdb_data.get('original_title')
movie.chinese_title = tmdb_data.get('title')
movie.aka_titles = tmdb_data.get('aka_titles', [])
movie.year = tmdb_data.get('release_date', '')[:4]
movie.release_date = tmdb_data.get('release_date')
movie.runtime = tmdb_data.get('runtime')
movie.spoken_languages = tmdb_data.get('spoken_languages', [])
movie.tagline = tmdb_data.get('tagline')
movie.imdb_rating = f"{tmdb_data.get('vote_average', 0):.1f}/10"
movie.directors = tmdb_data.get('directors', [])
movie.writers = tmdb_data.get('writers', [])
movie.actors = tmdb_data.get('actors', [])
movie.genres = tmdb_data.get('genres', [])
movie.countries = tmdb_data.get('countries', [])
# TMDB的简介可能是空的先用着后面会被豆瓣覆盖
movie.synopsis = tmdb_data.get('overview', '')
if tmdb_data.get('poster_path'):
movie.poster_url = f"https://image.tmdb.org/t/p/original{tmdb_data['poster_path']}"
# 使用豆瓣数据进行补充或覆盖
if douban_data:
movie.douban_id = douban_id
movie.douban_link = f"https://movie.douban.com/subject/{douban_id}/"
# 智能填充如果TMDB没值才用豆瓣的
movie.chinese_title = movie.chinese_title or douban_data.get('chinese_title')
movie.year = movie.year or douban_data.get('year')
movie.runtime = movie.runtime or douban_data.get('runtime')
movie.spoken_languages = movie.spoken_languages or douban_data.get('spoken_languages', [])
movie.directors = movie.directors or douban_data.get('directors', [])
movie.writers = movie.writers or douban_data.get('writers', [])
movie.actors = movie.actors or douban_data.get('actors', [])
movie.genres = movie.genres or douban_data.get('genres', [])
movie.countries = movie.countries or douban_data.get('countries', [])
movie.poster_url = movie.poster_url or douban_data.get('poster_url')
# 强制覆盖:豆瓣的简介、评分和获奖通常更符合中文区需求
if douban_data.get('synopsis'):
movie.synopsis = douban_data.get('synopsis')
movie.douban_rating = douban_data.get('douban_rating', 'N/A')
movie.awards = douban_data.get('awards', [])
# 确保IMDb链接存在
if movie.imdb_id:
movie.imdb_link = f"https://www.imdb.com/title/{movie.imdb_id}/"
# 确保标题有备用值
if not movie.original_title:
movie.original_title = movie.chinese_title
if not movie.chinese_title:
movie.chinese_title = movie.original_title
# --- 步骤 6: 后续处理 (图片转存、渲染、缓存) ---
if self.uploader and movie.poster_url:
print(f"开始转存图片: {movie.poster_url}")
movie.poster_url = await self.uploader.upload(movie.poster_url)
print(f"转存成功新URL: {movie.poster_url}")
template = self.jinja_env.get_template('description.jinja2')
final_text = template.render(movie.dict())
# 写入缓存
await self.cache.set(cache_key, final_text, ttl=self.settings.redis.cache_ttl_seconds)
print("\n" + "="*40 + " FINAL RENDERED OUTPUT " + "="*40)
print(final_text)
print("="*103 + "\n")
return final_text