import json import asyncio import re import os import httpx from typing import Optional from contextlib import asynccontextmanager from fastapi import FastAPI, Query, Request from fastapi.responses import StreamingResponse, Response, JSONResponse from playwright.async_api import async_playwright, BrowserContext BASE_URL = "https://animepahe.si" ANIWATCHTV_BASE = "https://aniwatchtv.to" ANILIST_API = "https://graphql.anilist.co" JIKAN_API = "https://api.jikan.moe/v4" IS_HEADLESS = os.environ.get("HEADLESS", "true").lower() == "true" # In-memory caches _info_cache: dict = {} _mal_synopsis_cache: dict = {} class AnimePahe: def __init__(self): self.playwright = None self.context: Optional[BrowserContext] = None self.ad_domains = [ "doubleclick.net", "adservice.google", "popads.net", "propellerads", "exoclick", "bebi.com", ] async def start(self): self.playwright = await async_playwright().start() self.context = await self.playwright.chromium.launch_persistent_context( user_data_dir="./browser_data", headless=IS_HEADLESS, user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122 Safari/537.36", args=[ "--disable-blink-features=AutomationControlled", "--no-sandbox", ], ) await self.context.route("**/*", self._intercept_assets) async def stop(self): if self.context: await self.context.close() if self.playwright: await self.playwright.stop() async def _intercept_assets(self, route): url = route.request.url.lower() # Allow requests from aniwatchtv & kwik (video host) so players/posters load correctly if "aniwatchtv.to" in url or "kwik" in url: await route.continue_() return if any(ad in url for ad in self.ad_domains) or url.endswith( (".png", ".jpg", ".jpeg", ".webp", ".woff", ".gif") ): await route.abort() else: await route.continue_() async def _fetch_json(self, url: str): page = await self.context.new_page() try: await page.goto(url, wait_until="domcontentloaded") txt = await page.evaluate("document.body.innerText") return json.loads(txt) except: return None finally: await page.close() def _generate_mp4( self, m3u8_url: Optional[str], anime_name: str, episode: str, res: str ) -> Optional[str]: if not m3u8_url: return None # Example: https://na-02.kwik.cx/stream/abc123def/index.m3u8 match = re.search(r"(https?://[^/]+)/stream/([^/]+)/", m3u8_url) if match: clean_name = re.sub(r"[^\w\s]", "", anime_name).strip().replace(" ", "_") if not clean_name: clean_name = "Anime" filename = f"{clean_name}_EP{episode}_{res}P.mp4" domain = match.group(1) # e.g. https://na-02.kwik.cx token = match.group(2) # e.g. abc123def return f"{domain}/mp4/{token}?file={filename}" return None async def _scrape_play_meta(self, page) -> tuple: meta = await page.evaluate("""() => { const titleEl = document.querySelector('.theatre-info h1 a, .theatre-info h2 a, .anime-title, h1, h2'); let title = titleEl ? titleEl.innerText.trim() : ''; let episode = ''; const t = document.title || ''; // Match exactly: "Anime Name - 01 - AnimePahe" OR "Anime Name - Episode 01 - AnimePahe" const m = t.match(/^(.+?)\\s*[-\\u2013]\\s*(?:Episode\\s*)?(\\d+(?:\\.\\d+)?)/i); if (m) { if (!title || title.length < 2) title = m[1].trim(); if (!episode) episode = m[2].trim(); } return { title, episode } }""") title = (meta.get("title") or "").strip() or "Unknown" episode = (meta.get("episode") or "").strip() or "00" return title, episode # ---------------- SCRAPE IDs ONLY ---------------- async def _scrape_ids(self, session: str) -> dict: page = await self.context.new_page() try: await page.goto( f"{BASE_URL}/anime/{session}", wait_until="domcontentloaded", timeout=30000, ) try: await page.wait_for_selector( "div.anime-info, div.anime-summary, aside, main", timeout=10000 ) except: pass await asyncio.sleep(2) ids = await page.evaluate("""() => { let ids = {} document.querySelectorAll("a[href]").forEach(a => { const url = a.href || "" if (url.includes("myanimelist.net/anime")) ids["mal"] = url.split("/").filter(Boolean).pop() if (url.includes("anilist.co/anime")) ids["anilist"] = url.split("/").filter(Boolean).pop() if (url.includes("anidb.net")) ids["anidb"] = url.split("/").filter(Boolean).pop() if (url.includes("animenewsnetwork.com")) { const m = url.match(/id=(\\d+)/) if (m) ids["ann"] = m[1] } if (url.includes("anime-planet.com/anime")) ids["animePlanet"] = url.split("/").filter(Boolean).pop() }) return ids }""") ids["animepahe"] = session return ids except Exception as e: print(f"[scrape_ids] ERROR: {e}") return {"animepahe": session} finally: await page.close() # ---------------- MAL SYNOPSIS ---------------- async def _fetch_mal_synopsis(self, mal_id: str) -> Optional[str]: if mal_id in _mal_synopsis_cache: return _mal_synopsis_cache[mal_id] try: async with httpx.AsyncClient(timeout=10) as client: resp = await client.get( f"{JIKAN_API}/anime/{mal_id}", headers={"Accept": "application/json"}, ) resp.raise_for_status() synopsis = resp.json().get("data", {}).get("synopsis") _mal_synopsis_cache[mal_id] = synopsis return synopsis except Exception as e: print(f"[mal_synopsis] failed for mal_id={mal_id}: {e}") _mal_synopsis_cache[mal_id] = None return None # ---------------- SHARED RESOLVE HELPERS ---------------- async def _collect_buttons(self, page) -> list: buttons = await page.locator("#resolutionMenu button").all() res_data = [] for btn in buttons: text = (await btn.inner_text()).strip() res_match = re.search(r"(\d+)", text) audio_lang = (await btn.get_attribute("data-audio") or "jpn").lower() audio_type = "dub" if audio_lang == "eng" else "sub" res_data.append( { "embed": await btn.get_attribute("data-src"), "res": int(res_match.group(1)) if res_match else 720, "fansub": text.split("·")[0].strip() if "·" in text else "Unknown", "audio": audio_type, "audio_lang": audio_lang, } ) return res_data async def _embed_to_m3u8(self, embed_url: str) -> Optional[str]: p = await self.context.new_page() m3u8 = None found = asyncio.Event() def capture(req): nonlocal m3u8 if ".m3u8" in req.url and not found.is_set(): m3u8 = req.url found.set() p.on("request", capture) try: # Set the exact referer Kwik expects to prevent token rejections await p.set_extra_http_headers({"Referer": "https://animepahe.si/"}) await p.goto(embed_url, wait_until="domcontentloaded", timeout=15000) # Click loop: Muting allows browsers to bypass autoplay restrictions safely for _ in range(6): if found.is_set(): break await p.evaluate("""() => { document.querySelectorAll('video').forEach(v => { v.muted = true; const p = v.play(); if (p !== undefined) p.catch(() => {}); }); document.querySelectorAll('button, .vjs-big-play-button').forEach(b => { try { b.click() } catch(e) {} }); }""") await asyncio.sleep(1.5) try: await asyncio.wait_for(found.wait(), timeout=5.0) except asyncio.TimeoutError: pass except Exception as e: print(f"[_embed_to_m3u8] ERROR: {e}") finally: await p.close() return m3u8 # ---------------- ANILIST ---------------- async def _fetch_anilist(self, anilist_id: str) -> dict: query = """ query ($id: Int) { Media(id: $id, type: ANIME) { id idMal title { romaji english native } synonyms description(asHtml: false) format status episodes duration source countryOfOrigin isAdult startDate { year month day } endDate { year month day } season seasonYear averageScore meanScore popularity favourites trending genres coverImage { extraLarge large medium color } bannerImage trailer { id site } studios(isMain: true) { nodes { name siteUrl } } relations { edges { relationType(version: 2) node { id idMal title { romaji english } format status episodes averageScore coverImage { medium } siteUrl } } } recommendations(perPage: 20, sort: RATING_DESC) { nodes { rating mediaRecommendation { id idMal title { romaji english } format status episodes averageScore coverImage { medium } siteUrl } } } externalLinks { site url type } nextAiringEpisode { airingAt episode } } } """ try: async with httpx.AsyncClient(timeout=15) as client: resp = await client.post( ANILIST_API, json={"query": query, "variables": {"id": int(anilist_id)}}, headers={ "Content-Type": "application/json", "Accept": "application/json", }, ) resp.raise_for_status() result = resp.json() except Exception as e: print(f"[anilist] failed for id={anilist_id}: {e}") return {"error": f"AniList fetch failed: {str(e)}"} media = result.get("data", {}).get("Media") if not media: return {"error": "AniList returned no data"} mal_id = str(media.get("idMal") or "") mal_synopsis = await self._fetch_mal_synopsis(mal_id) if mal_id else None synopsis = mal_synopsis or media.get("description") def fmt_date(d): if not d or not d.get("year"): return None parts = [d.get("year"), d.get("month"), d.get("day")] return "-".join(str(p).zfill(2) for p in parts if p) trailer = None if media.get("trailer"): t = media["trailer"] if t.get("site") == "youtube": trailer = f"https://www.youtube.com/watch?v={t['id']}" elif t.get("site") == "dailymotion": trailer = f"https://www.dailymotion.com/video/{t['id']}" # ---------- Relations from AniList ---------- relations: dict[str, list] = {} for edge in media.get("relations", {}).get("edges", []): node = edge.get("node", {}) if not node: continue relation_type = edge.get("relationType", "OTHER") entry = { "id": node.get("id"), "mal_id": node.get("idMal"), "title": ( node.get("title", {}).get("english") or node.get("title", {}).get("romaji") ), "format": node.get("format"), "status": node.get("status"), "episodes": node.get("episodes"), "score": node.get("averageScore"), "image": node.get("coverImage", {}).get("medium"), "url": node.get("siteUrl"), "relation_type": relation_type, } relations.setdefault(relation_type, []).append(entry) # ---------- Recommendations ---------- recommendations = [] for node in media.get("recommendations", {}).get("nodes", []): rec = node.get("mediaRecommendation") if not rec: continue recommendations.append( { "id": rec.get("id"), "mal_id": rec.get("idMal"), "title": rec["title"].get("english") or rec["title"].get("romaji"), "format": rec.get("format"), "status": rec.get("status"), "episodes": rec.get("episodes"), "score": rec.get("averageScore"), "image": rec.get("coverImage", {}).get("medium"), "url": rec.get("siteUrl"), "rating": node.get("rating"), } ) return { "id": media.get("id"), "mal_id": media.get("idMal"), "title": { "romaji": media["title"].get("romaji"), "english": media["title"].get("english"), "native": media["title"].get("native"), }, "synonyms": media.get("synonyms", []), "synopsis": synopsis, "format": media.get("format"), "status": media.get("status"), "episodes": media.get("episodes"), "duration": media.get("duration"), "source": media.get("source"), "country": media.get("countryOfOrigin"), "is_adult": media.get("isAdult"), "start_date": fmt_date(media.get("startDate")), "end_date": fmt_date(media.get("endDate")), "season": media.get("season"), "season_year": media.get("seasonYear"), "average_score": media.get("averageScore"), "mean_score": media.get("meanScore"), "popularity": media.get("popularity"), "favourites": media.get("favourites"), "trending": media.get("trending"), "genres": media.get("genres", []), "cover_image": media.get("coverImage", {}), "banner_image": media.get("bannerImage"), "trailer": trailer, "studios": [s["name"] for s in media.get("studios", {}).get("nodes", [])], "next_airing": media.get("nextAiringEpisode"), "external_links": [ {"site": l["site"], "url": l["url"], "type": l["type"]} for l in media.get("externalLinks", []) ], "relations": relations, "recommendations": recommendations, } # ---------------- SEARCH ---------------- async def search(self, q: str): data = await self._fetch_json(f"{BASE_URL}/api?m=search&q={q}") return data.get("data", []) if data else [] # ---------------- LATEST ---------------- async def get_latest(self, p: int = 1): return await self._fetch_json(f"{BASE_URL}/api?m=airing&page={p}") # ---------------- EPISODES ---------------- async def get_episodes(self, anime_id: str, p: int = 1, resolve: bool = False): data = await self._fetch_json( f"{BASE_URL}/api?m=release&id={anime_id}&sort=episode_desc&page={p}" ) if not data or not resolve: return data episodes = data.get("data", []) async def enrich(ep): ep_session = ep.get("session") if not ep_session: return ep stream = await self.resolve(anime_id, ep_session) ep["sub"] = stream.get("sub") ep["dub"] = stream.get("dub") return ep data["data"] = list(await asyncio.gather(*[enrich(ep) for ep in episodes])) return data # ---------------- IDS ONLY ---------------- async def get_ids(self, session: str): try: ids = await self._scrape_ids(session) return { "animepahe": ids.get("animepahe"), "anilist": ids.get("anilist"), "mal": ids.get("mal"), "anidb": ids.get("anidb"), "ann": ids.get("ann"), "animePlanet": ids.get("animePlanet"), } except Exception as e: print(f"[get_ids] ERROR: {e}") return {"error": f"Failed: {str(e)}"} # ---------------- INFO ---------------- async def get_info(self, session: str): try: ids = await self._scrape_ids(session) anilist_id = ids.get("anilist") if not anilist_id: return { "error": "Could not find AniList ID on AnimePahe page", "ids": ids, } if anilist_id in _info_cache: return _info_cache[anilist_id] data = await self._fetch_anilist(anilist_id) if "error" in data: return {"error": data["error"], "ids": ids} data["ids"] = { "animepahe": ids.get("animepahe"), "anilist": anilist_id, "mal": ids.get("mal"), "anidb": ids.get("anidb"), "ann": ids.get("ann"), "animePlanet": ids.get("animePlanet"), } _info_cache[anilist_id] = data return data except Exception as e: print(f"[get_info] ERROR: {e}") return {"error": f"Failed: {str(e)}"} # ---------------- RESOLVE ---------------- async def resolve(self, anime_session: str, episode_session: str): play_url = f"{BASE_URL}/play/{anime_session}/{episode_session}" page = await self.context.new_page() try: await page.goto(play_url, wait_until="domcontentloaded") await page.wait_for_selector( "#resolutionMenu button", state="attached", timeout=15000, ) anime_name, episode_num = await self._scrape_play_meta(page) res_data = await self._collect_buttons(page) await page.close() page = None subs = [r for r in res_data if r["audio"] == "sub"] dubs = [r for r in res_data if r["audio"] == "dub"] best_sub = max(subs, key=lambda x: x["res"]) if subs else None best_dub = max(dubs, key=lambda x: x["res"]) if dubs else None async def resolve_one(item): try: m3u8 = await self._embed_to_m3u8(item["embed"]) res_str = str(item["res"]) return { "resolution": res_str, "fansub": item["fansub"], "audio": item["audio"], "audio_lang": item["audio_lang"], "url": m3u8, "download": self._generate_mp4( m3u8, anime_name, episode_num, res_str ), } except Exception as e: return { "resolution": str(item["res"]), "fansub": item["fansub"], "audio": item["audio"], "audio_lang": item["audio_lang"], "url": None, "download": None, "error": str(e), } tasks = [] if best_sub: tasks.append(resolve_one(best_sub)) if best_dub: tasks.append(resolve_one(best_dub)) results = await asyncio.gather(*tasks) sub_result = results[0] if best_sub else None dub_result = ( results[1] if best_sub and best_dub else (results[0] if best_dub else None) ) return { "anime": anime_session, "episode": episode_session, "anime_name": anime_name, "episode_num": episode_num, "sub": sub_result, "dub": dub_result, } except Exception as e: return {"error": str(e)} finally: if page: await page.close() # ---------------- SEASONS ---------------- async def get_seasons(self, anime_id: str) -> dict: url = f"{ANIWATCHTV_BASE}/{anime_id}" page = await self.context.new_page() try: await page.goto(url, wait_until="domcontentloaded", timeout=30000) await asyncio.sleep(1) for selector in [".os-list", ".seasons-block", "[class*='season']", "main"]: try: await page.wait_for_selector(selector, timeout=5000) break except: continue seasons = await page.evaluate(f"""() => {{ const BASE = "{ANIWATCHTV_BASE}"; const currentId = "{anime_id}"; const results =[]; const seen = new Set(); const block = ( document.querySelector('.os-list') || document.querySelector('.seasons-block') || document.querySelector('[class*="os-list"]') || document.querySelector('[class*="season-list"]') ); const fallbackContainer = (() => {{ for (const el of document.querySelectorAll('*')) {{ if (/more\\s+seasons?/i.test(el.innerText?.trim() || '')) {{ let p = el.parentElement; for (let i = 0; i < 5; i++) {{ if (!p) break; if (p.querySelectorAll('a[href]').length > 0) return p; p = p.parentElement; }} }} }} return null; }})(); const container = block || fallbackContainer; if (!container) return results; for (const a of container.querySelectorAll('a[href]')) {{ const href = a.getAttribute('href') || ''; const fullUrl = href.startsWith('http') ? href : href.startsWith('/') ? BASE + href : null; if (!fullUrl) continue; const slug = fullUrl.replace(/\\/$/, '').split('/').pop(); if (!slug || seen.has(slug)) continue; seen.add(slug); const numericMatch = slug.match(/-(\\d+)$/); const numericId = numericMatch ? numericMatch[1] : null; const titleEl = a.querySelector('span, [class*="title"], [class*="name"]'); const title = (titleEl?.innerText?.trim() || a.innerText?.trim() || slug); const posterEl = a.querySelector('.season-poster') || a.closest('li, div')?.querySelector('.season-poster'); let poster = null; if (posterEl) {{ const bg = posterEl.style.backgroundImage || window.getComputedStyle(posterEl).backgroundImage; const bg2 = bg.split('url(').pop().split(')')[0].replace(/['"/]/g, '').trim(); if (bg2 && bg2.startsWith('http')) poster = bg2; }} results.push({{ title, id: slug, numericId, url: fullUrl, poster }}); }} return results; }}""") return { "id": anime_id, "total": len(seasons), "seasons": seasons, } except Exception as e: print(f"[get_seasons] ERROR: {e}") return {"id": anime_id, "total": 0, "seasons": [], "error": str(e)} finally: await page.close() pahe = AnimePahe() @asynccontextmanager async def lifespan(app: FastAPI): await pahe.start() yield await pahe.stop() app = FastAPI(lifespan=lifespan) @app.get("/", response_class=JSONResponse) async def root(): return { "status": "ok", "routes": [ "/search?q=:title", "/latest?p=:page", "/info/:session", "/ids/:session", "/episodes/:session?p=:page&resolve=false|true", "/resolve/:animeSession/:episodeSession", "/seasons/:animeId", "/poster?url=:cdnImageUrl", ], } @app.get("/search") async def api_search(q: str): return await pahe.search(q) @app.get("/latest") async def api_latest(p: int = 1): return await pahe.get_latest(p) @app.get("/info/{session}") async def api_info(session: str): return await pahe.get_info(session) @app.get("/ids/{session}") async def api_ids(session: str): return await pahe.get_ids(session) @app.get("/episodes/{session}") async def api_episodes(session: str, p: int = 1, resolve: bool = False): return await pahe.get_episodes(session, p, resolve) @app.get("/resolve/{anime}/{episode}") async def api_resolve(anime: str, episode: str): return await pahe.resolve(anime, episode) @app.get("/seasons/{anime_id:path}") async def api_seasons(anime_id: str, request: Request): data = await pahe.get_seasons(anime_id) base_url = str(request.base_url).rstrip("/") for season in data.get("seasons", []): if season.get("poster"): season["posterProxied"] = f"{base_url}/poster?url={season['poster']}" else: season["posterProxied"] = None return data @app.get("/poster") async def api_poster(url: str = Query(..., description="CDN image URL to proxy")): try: async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client: resp = await client.get( url, headers={ "Referer": "https://aniwatchtv.to/", "User-Agent": "Mozilla/5.0", }, ) resp.raise_for_status() content_type = resp.headers.get("content-type", "image/jpeg") return Response(content=resp.content, media_type=content_type) except Exception as e: return Response( content=f"Failed to fetch image: {e}", status_code=502, media_type="text/plain", ) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)