import re from typing import Optional from jinja2 import Environment, FileSystemLoader from pt_gen.core.config import Settings from pt_gen.models.movie import MovieInfo from pt_gen.services.cache import RedisCache from pt_gen.services.douban import DoubanScraper from pt_gen.services.tmdb import TMDBClient from pt_gen.services.uploader import ImageUploader class InfoOrchestrator: def __init__(self, settings: Settings): self.settings = settings self.tmdb = TMDBClient(api_key=self.settings.tmdb.api_key) self.douban = DoubanScraper(cookie=self.settings.douban.cookie) self.cache = RedisCache( host=self.settings.redis.host, port=self.settings.redis.port, db=self.settings.redis.db ) if self.settings.uploader.enable: self.uploader = ImageUploader( api_url=self.settings.uploader.api_url, api_key=self.settings.uploader.api_key ) else: self.uploader = None self.jinja_env = Environment(loader=FileSystemLoader('templates')) def _extract_douban_id(self, url: str) -> Optional[str]: match = re.search(r'douban\.com/subject/(\d+)', url) return match.group(1) if match else None def _extract_imdb_id(self, url: str) -> Optional[str]: match = re.search(r'imdb\.com/title/(tt\d+)', url) return match.group(1) if match else None def _extract_tmdb_id(self, url: str) -> Optional[str]: match = re.search(r'themoviedb\.org/(?:movie|tv)/(\d+)', url) return match.group(1) if match else None async def generate_info(self, url: str) -> Optional[str]: # --- 步骤 1: 识别URL类型并提取初始ID --- douban_id = self._extract_douban_id(url) imdb_id = self._extract_imdb_id(url) tmdb_id = self._extract_tmdb_id(url) if not (douban_id or imdb_id or tmdb_id): return "无效或不支持的链接。请输入有效的豆瓣、IMDb或TMDB链接。" # --- 步骤 2: ID 补全 (以 IMDb ID 为核心) --- # 如果从 TMDB 链接开始,先找到 IMDb ID if tmdb_id and not imdb_id: details = await self.tmdb.get_movie_details(tmdb_id) if details and details.get('imdb_id'): imdb_id = details['imdb_id'] # 如果从豆瓣链接开始,爬取豆瓣页面来找到 IMDb ID if douban_id and not imdb_id: # 临时爬一次,只为获取 IMDb ID temp_douban_data = await self.douban.scrape_movie_info(douban_id) if temp_douban_data.get('imdb_id'): imdb_id = temp_douban_data['imdb_id'] # --- 步骤 3: 查缓存 (使用唯一的 IMDb ID 作为 key) --- if imdb_id: cache_key = f"movieinfo:{imdb_id}" cached_result = await self.cache.get(cache_key) if cached_result: print(f"命中缓存: {cache_key}") return cached_result else: # 如果没有IMDb ID (例如一个没有IMDB链接的冷门豆瓣电影), 则用豆瓣ID做缓存key cache_key = f"movieinfo:douban:{douban_id}" # --- 步骤 4: 分别获取数据 --- douban_data = {} tmdb_data = {} # 如果有豆瓣ID,就从豆瓣获取数据 if douban_id: douban_data = await self.douban.scrape_movie_info(douban_id) if not douban_data: return "从豆瓣获取信息失败,请检查链接或Cookie。" # 如果有IMDb ID,就从TMDB获取数据 if imdb_id: tmdb_find_data = await self.tmdb.find_by_imdb_id(imdb_id) if tmdb_find_data and tmdb_find_data.get('movie_results'): # 确保 tmdb_id 被正确设置 tmdb_id = tmdb_find_data['movie_results'][0]['id'] tmdb_data = await self.tmdb.get_movie_details(tmdb_id) if imdb_id and not tmdb_id: tmdb_find_data = await self.tmdb.find_by_imdb_id(imdb_id) if tmdb_find_data and tmdb_find_data.get('movie_results'): tmdb_id = tmdb_find_data['movie_results'][0]['id'] if tmdb_id: # 现在 get_movie_details 返回的是干净的数据 tmdb_data = await self.tmdb.get_movie_details(tmdb_id) # --- 步骤 5: 智能合并数据到 MovieInfo 对象 --- movie = MovieInfo() # 优先使用TMDB数据,因为通常更规范 if tmdb_data: movie.tmdb_id = tmdb_data.get('id') movie.imdb_id = tmdb_data.get('imdb_id') movie.original_title = tmdb_data.get('original_title') movie.chinese_title = tmdb_data.get('title') movie.aka_titles = tmdb_data.get('aka_titles', []) movie.year = tmdb_data.get('release_date', '')[:4] movie.release_date = tmdb_data.get('release_date') movie.runtime = tmdb_data.get('runtime') movie.spoken_languages = tmdb_data.get('spoken_languages', []) movie.tagline = tmdb_data.get('tagline') movie.imdb_rating = f"{tmdb_data.get('vote_average', 0):.1f}/10" movie.directors = tmdb_data.get('directors', []) movie.writers = tmdb_data.get('writers', []) movie.actors = tmdb_data.get('actors', []) movie.genres = tmdb_data.get('genres', []) movie.countries = tmdb_data.get('countries', []) # TMDB的简介可能是空的,先用着,后面会被豆瓣覆盖 movie.synopsis = tmdb_data.get('overview', '') if tmdb_data.get('poster_path'): movie.poster_url = f"https://image.tmdb.org/t/p/original{tmdb_data['poster_path']}" # 使用豆瓣数据进行补充或覆盖 if douban_data: movie.douban_id = douban_id movie.douban_link = f"https://movie.douban.com/subject/{douban_id}/" # 智能填充:如果TMDB没值,才用豆瓣的 movie.chinese_title = movie.chinese_title or douban_data.get('chinese_title') movie.year = movie.year or douban_data.get('year') movie.runtime = movie.runtime or douban_data.get('runtime') movie.spoken_languages = movie.spoken_languages or douban_data.get('spoken_languages', []) movie.directors = movie.directors or douban_data.get('directors', []) movie.writers = movie.writers or douban_data.get('writers', []) movie.actors = movie.actors or douban_data.get('actors', []) movie.genres = movie.genres or douban_data.get('genres', []) movie.countries = movie.countries or douban_data.get('countries', []) movie.poster_url = movie.poster_url or douban_data.get('poster_url') # 强制覆盖:豆瓣的简介、评分和获奖通常更符合中文区需求 if douban_data.get('synopsis'): movie.synopsis = douban_data.get('synopsis') movie.douban_rating = douban_data.get('douban_rating', 'N/A') movie.awards = douban_data.get('awards', []) # 确保IMDb链接存在 if movie.imdb_id: movie.imdb_link = f"https://www.imdb.com/title/{movie.imdb_id}/" # 确保标题有备用值 if not movie.original_title: movie.original_title = movie.chinese_title if not movie.chinese_title: movie.chinese_title = movie.original_title # --- 步骤 6: 后续处理 (图片转存、渲染、缓存) --- if self.uploader and movie.poster_url: print(f"开始转存图片: {movie.poster_url}") movie.poster_url = await self.uploader.upload(movie.poster_url) print(f"转存成功,新URL: {movie.poster_url}") template = self.jinja_env.get_template('description.jinja2') final_text = template.render(movie.dict()) # 写入缓存 await self.cache.set(cache_key, final_text, ttl=self.settings.redis.cache_ttl_seconds) print("\n" + "="*40 + " FINAL RENDERED OUTPUT " + "="*40) print(final_text) print("="*103 + "\n") return final_text