From 630ad213ad328ca24f2d879581163dc5aabb2f50 Mon Sep 17 00:00:00 2001 From: Aira Catapang Date: Wed, 18 Mar 2026 10:32:59 +0000 Subject: [PATCH] Update main.py --- main.py | 348 ++++++++++++++++++++++++-------------------------------- 1 file changed, 150 insertions(+), 198 deletions(-) diff --git a/main.py b/main.py index 6930ac4..9b54e0c 100644 --- a/main.py +++ b/main.py @@ -3,11 +3,14 @@ import asyncio import re import os import httpx +import urllib.parse from typing import Optional from contextlib import asynccontextmanager from fastapi import FastAPI, Query, Request from fastapi.responses import StreamingResponse, Response, JSONResponse +from fastapi.middleware.cors import CORSMiddleware from playwright.async_api import async_playwright, BrowserContext +from playwright_stealth import Stealth BASE_URL = "https://animepahe.si" ANIWATCHTV_BASE = "https://aniwatchtv.to" @@ -55,7 +58,6 @@ class AnimePahe: async def _intercept_assets(self, route): url = route.request.url.lower() - # Allow requests from aniwatchtv & kwik (video host) so players/posters load correctly if "aniwatchtv.to" in url or "kwik" in url: await route.continue_() return @@ -83,16 +85,28 @@ class AnimePahe: ) -> Optional[str]: if not m3u8_url: return None - # Example: https://na-02.kwik.cx/stream/abc123def/index.m3u8 - match = re.search(r"(https?://[^/]+)/stream/([^/]+)/", m3u8_url) + + # Regex explanation: + # https?://([^.]+) matches the subdomain (e.g., vault-99) + # [^/]*/stream/ ignores the rest of the domain (e.g., .owocdn.top) and matches /stream/ + # (.*?)/[^/]+\.m3u8 matches the entire token path up to the final /uwu.m3u8 + match = re.search(r"https?://([^.]+)[^/]*/stream/(.*?)/[^/]+\.m3u8", m3u8_url) + if match: + subdomain = match.group(1) # e.g., "vault-99" + token_path = match.group( + 2 + ) # e.g., "99/01/d138b9bb16e0a47024fad856caab2fba99d7cbd661ef2662a3572694eaebcf9a" + clean_name = re.sub(r"[^\w\s]", "", anime_name).strip().replace(" ", "_") if not clean_name: clean_name = "Anime" + filename = f"{clean_name}_EP{episode}_{res}P.mp4" - domain = match.group(1) # e.g. https://na-02.kwik.cx - token = match.group(2) # e.g. abc123def - return f"{domain}/mp4/{token}?file={filename}" + + # Reconstruct the string using the kwik.cx domain and /mp4/ endpoint + return f"https://{subdomain}.kwik.cx/mp4/{token_path}?file={filename}" + return None async def _scrape_play_meta(self, page) -> tuple: @@ -102,7 +116,6 @@ class AnimePahe: let episode = ''; const t = document.title || ''; - // Match exactly: "Anime Name - 01 - AnimePahe" OR "Anime Name - Episode 01 - AnimePahe" const m = t.match(/^(.+?)\\s*[-\\u2013]\\s*(?:Episode\\s*)?(\\d+(?:\\.\\d+)?)/i); if (m) { @@ -117,8 +130,6 @@ class AnimePahe: episode = (meta.get("episode") or "").strip() or "00" return title, episode - # ---------------- SCRAPE IDs ONLY ---------------- - async def _scrape_ids(self, session: str) -> dict: page = await self.context.new_page() try: @@ -127,47 +138,35 @@ class AnimePahe: wait_until="domcontentloaded", timeout=30000, ) - try: await page.wait_for_selector( "div.anime-info, div.anime-summary, aside, main", timeout=10000 ) except: pass - await asyncio.sleep(2) - ids = await page.evaluate("""() => { let ids = {} document.querySelectorAll("a[href]").forEach(a => { const url = a.href || "" - if (url.includes("myanimelist.net/anime")) - ids["mal"] = url.split("/").filter(Boolean).pop() - if (url.includes("anilist.co/anime")) - ids["anilist"] = url.split("/").filter(Boolean).pop() - if (url.includes("anidb.net")) - ids["anidb"] = url.split("/").filter(Boolean).pop() + if (url.includes("myanimelist.net/anime")) ids["mal"] = url.split("/").filter(Boolean).pop() + if (url.includes("anilist.co/anime")) ids["anilist"] = url.split("/").filter(Boolean).pop() + if (url.includes("anidb.net")) ids["anidb"] = url.split("/").filter(Boolean).pop() if (url.includes("animenewsnetwork.com")) { const m = url.match(/id=(\\d+)/) if (m) ids["ann"] = m[1] } - if (url.includes("anime-planet.com/anime")) - ids["animePlanet"] = url.split("/").filter(Boolean).pop() + if (url.includes("anime-planet.com/anime")) ids["animePlanet"] = url.split("/").filter(Boolean).pop() }) return ids }""") - ids["animepahe"] = session return ids - except Exception as e: - print(f"[scrape_ids] ERROR: {e}") return {"animepahe": session} finally: await page.close() - # ---------------- MAL SYNOPSIS ---------------- - async def _fetch_mal_synopsis(self, mal_id: str) -> Optional[str]: if mal_id in _mal_synopsis_cache: return _mal_synopsis_cache[mal_id] @@ -181,13 +180,10 @@ class AnimePahe: synopsis = resp.json().get("data", {}).get("synopsis") _mal_synopsis_cache[mal_id] = synopsis return synopsis - except Exception as e: - print(f"[mal_synopsis] failed for mal_id={mal_id}: {e}") + except Exception: _mal_synopsis_cache[mal_id] = None return None - # ---------------- SHARED RESOLVE HELPERS ---------------- - async def _collect_buttons(self, page) -> list: buttons = await page.locator("#resolutionMenu button").all() res_data = [] @@ -220,11 +216,8 @@ class AnimePahe: p.on("request", capture) try: - # Set the exact referer Kwik expects to prevent token rejections await p.set_extra_http_headers({"Referer": "https://animepahe.si/"}) await p.goto(embed_url, wait_until="domcontentloaded", timeout=15000) - - # Click loop: Muting allows browsers to bypass autoplay restrictions safely for _ in range(6): if found.is_set(): break @@ -239,45 +232,30 @@ class AnimePahe: }); }""") await asyncio.sleep(1.5) - try: await asyncio.wait_for(found.wait(), timeout=5.0) except asyncio.TimeoutError: pass - except Exception as e: - print(f"[_embed_to_m3u8] ERROR: {e}") + except Exception: + pass finally: await p.close() return m3u8 - # ---------------- ANILIST ---------------- - async def _fetch_anilist(self, anilist_id: str) -> dict: query = """ query ($id: Int) { Media(id: $id, type: ANIME) { - id - idMal - title { romaji english native } - synonyms - description(asHtml: false) - format status episodes duration source countryOfOrigin isAdult - startDate { year month day } - endDate { year month day } - season seasonYear averageScore meanScore popularity favourites trending genres - coverImage { extraLarge large medium color } - bannerImage - trailer { id site } - studios(isMain: true) { nodes { name siteUrl } } + id idMal title { romaji english native } synonyms description(asHtml: false) format status episodes duration source countryOfOrigin isAdult + startDate { year month day } endDate { year month day } season seasonYear averageScore meanScore popularity favourites trending genres + coverImage { extraLarge large medium color } bannerImage trailer { id site } studios(isMain: true) { nodes { name siteUrl } } relations { edges { relationType(version: 2) node { id idMal title { romaji english } format status episodes averageScore coverImage { medium } siteUrl } } } recommendations(perPage: 20, sort: RATING_DESC) { nodes { rating mediaRecommendation { id idMal title { romaji english } format status episodes averageScore coverImage { medium } siteUrl } } } - externalLinks { site url type } - nextAiringEpisode { airingAt episode } + externalLinks { site url type } nextAiringEpisode { airingAt episode } } } """ - try: async with httpx.AsyncClient(timeout=15) as client: resp = await client.post( @@ -291,7 +269,6 @@ class AnimePahe: resp.raise_for_status() result = resp.json() except Exception as e: - print(f"[anilist] failed for id={anilist_id}: {e}") return {"error": f"AniList fetch failed: {str(e)}"} media = result.get("data", {}).get("Media") @@ -305,8 +282,11 @@ class AnimePahe: def fmt_date(d): if not d or not d.get("year"): return None - parts = [d.get("year"), d.get("month"), d.get("day")] - return "-".join(str(p).zfill(2) for p in parts if p) + return "-".join( + str(p).zfill(2) + for p in [d.get("year"), d.get("month"), d.get("day")] + if p + ) trailer = None if media.get("trailer"): @@ -316,31 +296,30 @@ class AnimePahe: elif t.get("site") == "dailymotion": trailer = f"https://www.dailymotion.com/video/{t['id']}" - # ---------- Relations from AniList ---------- - relations: dict[str, list] = {} + relations = {} for edge in media.get("relations", {}).get("edges", []): node = edge.get("node", {}) if not node: continue - relation_type = edge.get("relationType", "OTHER") - entry = { - "id": node.get("id"), - "mal_id": node.get("idMal"), - "title": ( - node.get("title", {}).get("english") - or node.get("title", {}).get("romaji") - ), - "format": node.get("format"), - "status": node.get("status"), - "episodes": node.get("episodes"), - "score": node.get("averageScore"), - "image": node.get("coverImage", {}).get("medium"), - "url": node.get("siteUrl"), - "relation_type": relation_type, - } - relations.setdefault(relation_type, []).append(entry) + rel = edge.get("relationType", "OTHER") + relations.setdefault(rel, []).append( + { + "id": node.get("id"), + "mal_id": node.get("idMal"), + "title": ( + node.get("title", {}).get("english") + or node.get("title", {}).get("romaji") + ), + "format": node.get("format"), + "status": node.get("status"), + "episodes": node.get("episodes"), + "score": node.get("averageScore"), + "image": node.get("coverImage", {}).get("medium"), + "url": node.get("siteUrl"), + "relation_type": rel, + } + ) - # ---------- Recommendations ---------- recommendations = [] for node in media.get("recommendations", {}).get("nodes", []): rec = node.get("mediaRecommendation") @@ -364,11 +343,7 @@ class AnimePahe: return { "id": media.get("id"), "mal_id": media.get("idMal"), - "title": { - "romaji": media["title"].get("romaji"), - "english": media["title"].get("english"), - "native": media["title"].get("native"), - }, + "title": media["title"], "synonyms": media.get("synonyms", []), "synopsis": synopsis, "format": media.get("format"), @@ -401,27 +376,19 @@ class AnimePahe: "recommendations": recommendations, } - # ---------------- SEARCH ---------------- - async def search(self, q: str): data = await self._fetch_json(f"{BASE_URL}/api?m=search&q={q}") return data.get("data", []) if data else [] - # ---------------- LATEST ---------------- - async def get_latest(self, p: int = 1): return await self._fetch_json(f"{BASE_URL}/api?m=airing&page={p}") - # ---------------- EPISODES ---------------- - async def get_episodes(self, anime_id: str, p: int = 1, resolve: bool = False): data = await self._fetch_json( f"{BASE_URL}/api?m=release&id={anime_id}&sort=episode_desc&page={p}" ) - if not data or not resolve: return data - episodes = data.get("data", []) async def enrich(ep): @@ -436,8 +403,6 @@ class AnimePahe: data["data"] = list(await asyncio.gather(*[enrich(ep) for ep in episodes])) return data - # ---------------- IDS ONLY ---------------- - async def get_ids(self, session: str): try: ids = await self._scrape_ids(session) @@ -450,30 +415,19 @@ class AnimePahe: "animePlanet": ids.get("animePlanet"), } except Exception as e: - print(f"[get_ids] ERROR: {e}") return {"error": f"Failed: {str(e)}"} - # ---------------- INFO ---------------- - async def get_info(self, session: str): try: ids = await self._scrape_ids(session) - anilist_id = ids.get("anilist") if not anilist_id: - return { - "error": "Could not find AniList ID on AnimePahe page", - "ids": ids, - } - + return {"error": "Could not find AniList ID", "ids": ids} if anilist_id in _info_cache: return _info_cache[anilist_id] - data = await self._fetch_anilist(anilist_id) - if "error" in data: return {"error": data["error"], "ids": ids} - data["ids"] = { "animepahe": ids.get("animepahe"), "anilist": anilist_id, @@ -482,30 +436,35 @@ class AnimePahe: "ann": ids.get("ann"), "animePlanet": ids.get("animePlanet"), } - _info_cache[anilist_id] = data return data - except Exception as e: - print(f"[get_info] ERROR: {e}") return {"error": f"Failed: {str(e)}"} - # ---------------- RESOLVE ---------------- - async def resolve(self, anime_session: str, episode_session: str): play_url = f"{BASE_URL}/play/{anime_session}/{episode_session}" page = await self.context.new_page() try: - await page.goto(play_url, wait_until="domcontentloaded") + # 1. Apply stealth to bypass Cloudflare + await Stealth().apply_stealth_async(page) + + try: + await page.goto(play_url, wait_until="commit", timeout=45000) + except Exception as e: + if "Timeout" not in str(e): + raise e + + # 2. Increase timeout to 30 seconds to give Cloudflare time to auto-resolve await page.wait_for_selector( - "#resolutionMenu button", - state="attached", - timeout=15000, + "#resolutionMenu button", state="attached", timeout=30000 ) anime_name, episode_num = await self._scrape_play_meta(page) res_data = await self._collect_buttons(page) + + # Don't close the page quite yet, pass it to _embed_to_m3u8 if needed + # Wait, actually we can close it here since _embed_to_m3u8 creates its own page. await page.close() page = None @@ -544,33 +503,33 @@ class AnimePahe: tasks.append(resolve_one(best_sub)) if best_dub: tasks.append(resolve_one(best_dub)) - results = await asyncio.gather(*tasks) - sub_result = results[0] if best_sub else None - dub_result = ( - results[1] - if best_sub and best_dub - else (results[0] if best_dub else None) - ) - return { "anime": anime_session, "episode": episode_session, "anime_name": anime_name, "episode_num": episode_num, - "sub": sub_result, - "dub": dub_result, + "sub": results[0] if best_sub else None, + "dub": results[1] + if best_sub and best_dub + else (results[0] if best_dub else None), } - except Exception as e: - return {"error": str(e)} + # 3. TAKE A SCREENSHOT ON FAILURE to see what blocked the bot + if page: + try: + await page.screenshot(path="debug_error.png", full_page=True) + except: + pass + return { + "error": str(e), + "hint": "Check debug_error.png to see what the browser got stuck on.", + } finally: if page: await page.close() - # ---------------- SEASONS ---------------- - async def get_seasons(self, anime_id: str) -> dict: url = f"{ANIWATCHTV_BASE}/{anime_id}" page = await self.context.new_page() @@ -588,51 +547,20 @@ class AnimePahe: seasons = await page.evaluate(f"""() => {{ const BASE = "{ANIWATCHTV_BASE}"; - const currentId = "{anime_id}"; const results =[]; const seen = new Set(); - - const block = ( - document.querySelector('.os-list') || - document.querySelector('.seasons-block') || - document.querySelector('[class*="os-list"]') || - document.querySelector('[class*="season-list"]') - ); - - const fallbackContainer = (() => {{ - for (const el of document.querySelectorAll('*')) {{ - if (/more\\s+seasons?/i.test(el.innerText?.trim() || '')) {{ - let p = el.parentElement; - for (let i = 0; i < 5; i++) {{ - if (!p) break; - if (p.querySelectorAll('a[href]').length > 0) return p; - p = p.parentElement; - }} - }} - }} - return null; - }})(); - - const container = block || fallbackContainer; + const container = document.querySelector('.os-list') || document.querySelector('.seasons-block') || document.querySelector('[class*="os-list"]'); if (!container) return results; for (const a of container.querySelectorAll('a[href]')) {{ const href = a.getAttribute('href') || ''; - const fullUrl = href.startsWith('http') ? href - : href.startsWith('/') ? BASE + href - : null; + const fullUrl = href.startsWith('http') ? href : BASE + href; if (!fullUrl) continue; - const slug = fullUrl.replace(/\\/$/, '').split('/').pop(); if (!slug || seen.has(slug)) continue; seen.add(slug); - - const numericMatch = slug.match(/-(\\d+)$/); - const numericId = numericMatch ? numericMatch[1] : null; - - const titleEl = a.querySelector('span, [class*="title"], [class*="name"]'); - const title = (titleEl?.innerText?.trim() || a.innerText?.trim() || slug); - + + const title = (a.querySelector('span, [class*="title"]')?.innerText?.trim() || a.innerText?.trim() || slug); const posterEl = a.querySelector('.season-poster') || a.closest('li, div')?.querySelector('.season-poster'); let poster = null; if (posterEl) {{ @@ -640,21 +568,12 @@ class AnimePahe: const bg2 = bg.split('url(').pop().split(')')[0].replace(/['"/]/g, '').trim(); if (bg2 && bg2.startsWith('http')) poster = bg2; }} - - results.push({{ title, id: slug, numericId, url: fullUrl, poster }}); + results.push({{ title, id: slug, url: fullUrl, poster }}); }} - return results; }}""") - - return { - "id": anime_id, - "total": len(seasons), - "seasons": seasons, - } - + return {"id": anime_id, "total": len(seasons), "seasons": seasons} except Exception as e: - print(f"[get_seasons] ERROR: {e}") return {"id": anime_id, "total": 0, "seasons": [], "error": str(e)} finally: await page.close() @@ -672,22 +591,21 @@ async def lifespan(app: FastAPI): app = FastAPI(lifespan=lifespan) +# 🔥 ENABLE CORS SO REACT CAN COMMUNICATE WITH THIS API 🔥 +app.add_middleware( + CORSMiddleware, + allow_origins=[ + "*" + ], # For dev, allows all origins. Change to your Vite URL in prod. + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + @app.get("/", response_class=JSONResponse) async def root(): - return { - "status": "ok", - "routes": [ - "/search?q=:title", - "/latest?p=:page", - "/info/:session", - "/ids/:session", - "/episodes/:session?p=:page&resolve=false|true", - "/resolve/:animeSession/:episodeSession", - "/seasons/:animeId", - "/poster?url=:cdnImageUrl", - ], - } + return {"status": "ok"} @app.get("/search") @@ -705,11 +623,6 @@ async def api_info(session: str): return await pahe.get_info(session) -@app.get("/ids/{session}") -async def api_ids(session: str): - return await pahe.get_ids(session) - - @app.get("/episodes/{session}") async def api_episodes(session: str, p: int = 1, resolve: bool = False): return await pahe.get_episodes(session, p, resolve) @@ -727,13 +640,11 @@ async def api_seasons(anime_id: str, request: Request): for season in data.get("seasons", []): if season.get("poster"): season["posterProxied"] = f"{base_url}/poster?url={season['poster']}" - else: - season["posterProxied"] = None return data @app.get("/poster") -async def api_poster(url: str = Query(..., description="CDN image URL to proxy")): +async def api_poster(url: str = Query(..., description="CDN image proxy")): try: async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client: resp = await client.get( @@ -744,15 +655,56 @@ async def api_poster(url: str = Query(..., description="CDN image URL to proxy") }, ) resp.raise_for_status() - content_type = resp.headers.get("content-type", "image/jpeg") - return Response(content=resp.content, media_type=content_type) + return Response( + content=resp.content, + media_type=resp.headers.get("content-type", "image/jpeg"), + ) except Exception as e: + return Response(content=f"Error: {e}", status_code=502) + + +# 🔥 NEW HLS PROXY TO BYPASS CORS & 403 🔥 +@app.get("/hls-proxy") +async def hls_proxy(url: str, request: Request): + headers = { + "Referer": "https://kwik.cx/", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122 Safari/537.36", + } + + # Helper function to stream chunks directly + async def stream_generator(): + async with httpx.AsyncClient(follow_redirects=True) as client: + async with client.stream("GET", url, headers=headers) as resp: + async for chunk in resp.aiter_bytes(): + yield chunk + + # If it is an M3U8 Playlist, we need to rewrite its internal links to ALSO use the proxy + if url.split("?")[0].endswith(".m3u8"): + async with httpx.AsyncClient(follow_redirects=True) as client: + resp = await client.get(url, headers=headers) + lines = resp.text.splitlines() + + base_proxy = f"{str(request.base_url).rstrip('/')}/hls-proxy?url=" + new_lines = [] + + for line in lines: + if line.startswith("#") or not line.strip(): + new_lines.append(line) + else: + # Merge relative paths (e.g. chunk1.ts) with the absolute url + absolute_url = urllib.parse.urljoin(url, line.strip()) + # Wrap it in our proxy path + proxy_url = f"{base_proxy}{urllib.parse.quote(absolute_url)}" + new_lines.append(proxy_url) + return Response( - content=f"Failed to fetch image: {e}", - status_code=502, - media_type="text/plain", + content="\n".join(new_lines), media_type="application/vnd.apple.mpegurl" ) + else: + # Stream the binary video chunk (.ts) + return StreamingResponse(stream_generator(), media_type="video/MP2T") + if __name__ == "__main__": import uvicorn