init
This commit is contained in:
177
pt_gen/core/orchestrator.py
Normal file
177
pt_gen/core/orchestrator.py
Normal file
@@ -0,0 +1,177 @@
|
||||
import re
|
||||
from typing import Optional
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
from pt_gen.core.config import Settings
|
||||
from pt_gen.models.movie import MovieInfo
|
||||
from pt_gen.services.cache import RedisCache
|
||||
from pt_gen.services.douban import DoubanScraper
|
||||
from pt_gen.services.tmdb import TMDBClient
|
||||
from pt_gen.services.uploader import ImageUploader
|
||||
|
||||
class InfoOrchestrator:
|
||||
def __init__(self, settings: Settings):
|
||||
self.settings = settings
|
||||
self.tmdb = TMDBClient(api_key=self.settings.tmdb.api_key)
|
||||
self.douban = DoubanScraper(cookie=self.settings.douban.cookie)
|
||||
self.cache = RedisCache(
|
||||
host=self.settings.redis.host,
|
||||
port=self.settings.redis.port,
|
||||
db=self.settings.redis.db
|
||||
)
|
||||
if self.settings.uploader.enable:
|
||||
self.uploader = ImageUploader(
|
||||
api_url=self.settings.uploader.api_url,
|
||||
api_key=self.settings.uploader.api_key
|
||||
)
|
||||
else:
|
||||
self.uploader = None
|
||||
|
||||
self.jinja_env = Environment(loader=FileSystemLoader('templates'))
|
||||
|
||||
def _extract_douban_id(self, url: str) -> Optional[str]:
|
||||
match = re.search(r'douban\.com/subject/(\d+)', url)
|
||||
return match.group(1) if match else None
|
||||
|
||||
def _extract_imdb_id(self, url: str) -> Optional[str]:
|
||||
match = re.search(r'imdb\.com/title/(tt\d+)', url)
|
||||
return match.group(1) if match else None
|
||||
|
||||
def _extract_tmdb_id(self, url: str) -> Optional[str]:
|
||||
match = re.search(r'themoviedb\.org/(?:movie|tv)/(\d+)', url)
|
||||
return match.group(1) if match else None
|
||||
|
||||
async def generate_info(self, url: str) -> Optional[str]:
|
||||
# --- 步骤 1: 识别URL类型并提取初始ID ---
|
||||
douban_id = self._extract_douban_id(url)
|
||||
imdb_id = self._extract_imdb_id(url)
|
||||
tmdb_id = self._extract_tmdb_id(url)
|
||||
|
||||
if not (douban_id or imdb_id or tmdb_id):
|
||||
return "无效或不支持的链接。请输入有效的豆瓣、IMDb或TMDB链接。"
|
||||
|
||||
# --- 步骤 2: ID 补全 (以 IMDb ID 为核心) ---
|
||||
# 如果从 TMDB 链接开始,先找到 IMDb ID
|
||||
if tmdb_id and not imdb_id:
|
||||
details = await self.tmdb.get_movie_details(tmdb_id)
|
||||
if details and details.get('imdb_id'):
|
||||
imdb_id = details['imdb_id']
|
||||
|
||||
# 如果从豆瓣链接开始,爬取豆瓣页面来找到 IMDb ID
|
||||
if douban_id and not imdb_id:
|
||||
# 临时爬一次,只为获取 IMDb ID
|
||||
temp_douban_data = await self.douban.scrape_movie_info(douban_id)
|
||||
if temp_douban_data.get('imdb_id'):
|
||||
imdb_id = temp_douban_data['imdb_id']
|
||||
|
||||
# --- 步骤 3: 查缓存 (使用唯一的 IMDb ID 作为 key) ---
|
||||
if imdb_id:
|
||||
cache_key = f"movieinfo:{imdb_id}"
|
||||
cached_result = await self.cache.get(cache_key)
|
||||
if cached_result:
|
||||
print(f"命中缓存: {cache_key}")
|
||||
return cached_result
|
||||
else:
|
||||
# 如果没有IMDb ID (例如一个没有IMDB链接的冷门豆瓣电影), 则用豆瓣ID做缓存key
|
||||
cache_key = f"movieinfo:douban:{douban_id}"
|
||||
|
||||
|
||||
# --- 步骤 4: 分别获取数据 ---
|
||||
douban_data = {}
|
||||
tmdb_data = {}
|
||||
|
||||
# 如果有豆瓣ID,就从豆瓣获取数据
|
||||
if douban_id:
|
||||
douban_data = await self.douban.scrape_movie_info(douban_id)
|
||||
if not douban_data:
|
||||
return "从豆瓣获取信息失败,请检查链接或Cookie。"
|
||||
|
||||
# 如果有IMDb ID,就从TMDB获取数据
|
||||
if imdb_id:
|
||||
tmdb_find_data = await self.tmdb.find_by_imdb_id(imdb_id)
|
||||
if tmdb_find_data and tmdb_find_data.get('movie_results'):
|
||||
# 确保 tmdb_id 被正确设置
|
||||
tmdb_id = tmdb_find_data['movie_results'][0]['id']
|
||||
tmdb_data = await self.tmdb.get_movie_details(tmdb_id)
|
||||
if imdb_id and not tmdb_id:
|
||||
tmdb_find_data = await self.tmdb.find_by_imdb_id(imdb_id)
|
||||
if tmdb_find_data and tmdb_find_data.get('movie_results'):
|
||||
tmdb_id = tmdb_find_data['movie_results'][0]['id']
|
||||
|
||||
if tmdb_id:
|
||||
# 现在 get_movie_details 返回的是干净的数据
|
||||
tmdb_data = await self.tmdb.get_movie_details(tmdb_id)
|
||||
|
||||
# --- 步骤 5: 智能合并数据到 MovieInfo 对象 ---
|
||||
movie = MovieInfo()
|
||||
# 优先使用TMDB数据,因为通常更规范
|
||||
if tmdb_data:
|
||||
movie.tmdb_id = tmdb_data.get('id')
|
||||
movie.imdb_id = tmdb_data.get('imdb_id')
|
||||
movie.original_title = tmdb_data.get('original_title')
|
||||
movie.chinese_title = tmdb_data.get('title')
|
||||
movie.aka_titles = tmdb_data.get('aka_titles', [])
|
||||
movie.year = tmdb_data.get('release_date', '')[:4]
|
||||
movie.release_date = tmdb_data.get('release_date')
|
||||
movie.runtime = tmdb_data.get('runtime')
|
||||
movie.spoken_languages = tmdb_data.get('spoken_languages', [])
|
||||
movie.tagline = tmdb_data.get('tagline')
|
||||
movie.imdb_rating = f"{tmdb_data.get('vote_average', 0):.1f}/10"
|
||||
movie.directors = tmdb_data.get('directors', [])
|
||||
movie.writers = tmdb_data.get('writers', [])
|
||||
movie.actors = tmdb_data.get('actors', [])
|
||||
movie.genres = tmdb_data.get('genres', [])
|
||||
movie.countries = tmdb_data.get('countries', [])
|
||||
# TMDB的简介可能是空的,先用着,后面会被豆瓣覆盖
|
||||
movie.synopsis = tmdb_data.get('overview', '')
|
||||
if tmdb_data.get('poster_path'):
|
||||
movie.poster_url = f"https://image.tmdb.org/t/p/original{tmdb_data['poster_path']}"
|
||||
|
||||
|
||||
# 使用豆瓣数据进行补充或覆盖
|
||||
if douban_data:
|
||||
movie.douban_id = douban_id
|
||||
movie.douban_link = f"https://movie.douban.com/subject/{douban_id}/"
|
||||
# 智能填充:如果TMDB没值,才用豆瓣的
|
||||
movie.chinese_title = movie.chinese_title or douban_data.get('chinese_title')
|
||||
movie.year = movie.year or douban_data.get('year')
|
||||
movie.runtime = movie.runtime or douban_data.get('runtime')
|
||||
movie.spoken_languages = movie.spoken_languages or douban_data.get('spoken_languages', [])
|
||||
movie.directors = movie.directors or douban_data.get('directors', [])
|
||||
movie.writers = movie.writers or douban_data.get('writers', [])
|
||||
movie.actors = movie.actors or douban_data.get('actors', [])
|
||||
movie.genres = movie.genres or douban_data.get('genres', [])
|
||||
movie.countries = movie.countries or douban_data.get('countries', [])
|
||||
movie.poster_url = movie.poster_url or douban_data.get('poster_url')
|
||||
# 强制覆盖:豆瓣的简介、评分和获奖通常更符合中文区需求
|
||||
if douban_data.get('synopsis'):
|
||||
movie.synopsis = douban_data.get('synopsis')
|
||||
movie.douban_rating = douban_data.get('douban_rating', 'N/A')
|
||||
movie.awards = douban_data.get('awards', [])
|
||||
# 确保IMDb链接存在
|
||||
if movie.imdb_id:
|
||||
movie.imdb_link = f"https://www.imdb.com/title/{movie.imdb_id}/"
|
||||
|
||||
# 确保标题有备用值
|
||||
if not movie.original_title:
|
||||
movie.original_title = movie.chinese_title
|
||||
if not movie.chinese_title:
|
||||
movie.chinese_title = movie.original_title
|
||||
|
||||
# --- 步骤 6: 后续处理 (图片转存、渲染、缓存) ---
|
||||
if self.uploader and movie.poster_url:
|
||||
print(f"开始转存图片: {movie.poster_url}")
|
||||
movie.poster_url = await self.uploader.upload(movie.poster_url)
|
||||
print(f"转存成功,新URL: {movie.poster_url}")
|
||||
|
||||
template = self.jinja_env.get_template('description.jinja2')
|
||||
final_text = template.render(movie.dict())
|
||||
|
||||
# 写入缓存
|
||||
await self.cache.set(cache_key, final_text, ttl=self.settings.redis.cache_ttl_seconds)
|
||||
|
||||
print("\n" + "="*40 + " FINAL RENDERED OUTPUT " + "="*40)
|
||||
print(final_text)
|
||||
print("="*103 + "\n")
|
||||
|
||||
return final_text
|
||||
Reference in New Issue
Block a user