From 07048d71c4b73e54be85758725e66e57b190ab2f Mon Sep 17 00:00:00 2001 From: Aira Catapang Date: Wed, 18 Mar 2026 09:53:57 +0000 Subject: [PATCH] Update main.py --- main.py | 202 ++++++++++++++++---------------------------------------- 1 file changed, 58 insertions(+), 144 deletions(-) diff --git a/main.py b/main.py index fc154aa..6930ac4 100644 --- a/main.py +++ b/main.py @@ -55,12 +55,13 @@ class AnimePahe: async def _intercept_assets(self, route): url = route.request.url.lower() - # Allow all requests from aniwatchtv so season posters can load - if "aniwatchtv.to" in url: + # Allow requests from aniwatchtv & kwik (video host) so players/posters load correctly + if "aniwatchtv.to" in url or "kwik" in url: await route.continue_() return + if any(ad in url for ad in self.ad_domains) or url.endswith( - (".png", ".jpg", ".jpeg", ".webp", ".woff") + (".png", ".jpg", ".jpeg", ".webp", ".woff", ".gif") ): await route.abort() else: @@ -82,33 +83,31 @@ class AnimePahe: ) -> Optional[str]: if not m3u8_url: return None - match = re.search(r"(https?://[^.]+)[^/]*/stream/(.*?)/[^/]+\.m3u8", m3u8_url) + # Example: https://na-02.kwik.cx/stream/abc123def/index.m3u8 + match = re.search(r"(https?://[^/]+)/stream/([^/]+)/", m3u8_url) if match: clean_name = re.sub(r"[^\w\s]", "", anime_name).strip().replace(" ", "_") + if not clean_name: + clean_name = "Anime" filename = f"{clean_name}_EP{episode}_{res}P.mp4" - return f"{match.group(1)}.kwik.cx/mp4/{match.group(2)}?file={filename}" + domain = match.group(1) # e.g. https://na-02.kwik.cx + token = match.group(2) # e.g. abc123def + return f"{domain}/mp4/{token}?file={filename}" return None async def _scrape_play_meta(self, page) -> tuple: meta = await page.evaluate("""() => { - const titleEl = document.querySelector('.theatre-info h2 a, .anime-title, h2 a[href*="/anime/"]') - const epEl = document.querySelector('.theatre-info h2, .episode-title, h2') + const titleEl = document.querySelector('.theatre-info h1 a, .theatre-info h2 a, .anime-title, h1, h2'); + let title = titleEl ? titleEl.innerText.trim() : ''; + let episode = ''; - let title = titleEl ? titleEl.innerText.trim() : '' - let episode = '' - - if (epEl) { - const m = epEl.innerText.match(/episode\\s*(\\d+)/i) - if (m) episode = m[1] - } - - if (!title || !episode) { - const t = document.title || '' - const m = t.match(/^(.+?)\\s*[-\\u2013]\\s*Episode\\s*(\\d+)/i) - if (m) { - if (!title) title = m[1].trim() - if (!episode) episode = m[2].trim() - } + const t = document.title || ''; + // Match exactly: "Anime Name - 01 - AnimePahe" OR "Anime Name - Episode 01 - AnimePahe" + const m = t.match(/^(.+?)\\s*[-\\u2013]\\s*(?:Episode\\s*)?(\\d+(?:\\.\\d+)?)/i); + + if (m) { + if (!title || title.length < 2) title = m[1].trim(); + if (!episode) episode = m[2].trim(); } return { title, episode } @@ -129,7 +128,6 @@ class AnimePahe: timeout=30000, ) - # Wait for the anime info block to render try: await page.wait_for_selector( "div.anime-info, div.anime-summary, aside, main", timeout=10000 @@ -137,15 +135,8 @@ class AnimePahe: except: pass - # Extra wait for JS-rendered content await asyncio.sleep(2) - # Debug: log all hrefs found on page - all_links = await page.evaluate("""() => { - return Array.from(document.querySelectorAll('a[href]')).map(a => a.href) - }""") - print(f"[scrape_ids] All links found: {all_links}") - ids = await page.evaluate("""() => { let ids = {} document.querySelectorAll("a[href]").forEach(a => { @@ -166,7 +157,6 @@ class AnimePahe: return ids }""") - print(f"[scrape_ids] Extracted ids: {ids}") ids["animepahe"] = session return ids @@ -230,17 +220,35 @@ class AnimePahe: p.on("request", capture) try: - await p.set_extra_http_headers({"Referer": BASE_URL}) - await p.goto(embed_url, wait_until="domcontentloaded") - await p.evaluate( - "document.querySelectorAll('button, video, [class*=play]').forEach(el => el.click())" - ) + # Set the exact referer Kwik expects to prevent token rejections + await p.set_extra_http_headers({"Referer": "https://animepahe.si/"}) + await p.goto(embed_url, wait_until="domcontentloaded", timeout=15000) + + # Click loop: Muting allows browsers to bypass autoplay restrictions safely + for _ in range(6): + if found.is_set(): + break + await p.evaluate("""() => { + document.querySelectorAll('video').forEach(v => { + v.muted = true; + const p = v.play(); + if (p !== undefined) p.catch(() => {}); + }); + document.querySelectorAll('button, .vjs-big-play-button').forEach(b => { + try { b.click() } catch(e) {} + }); + }""") + await asyncio.sleep(1.5) + try: await asyncio.wait_for(found.wait(), timeout=5.0) except asyncio.TimeoutError: pass + except Exception as e: + print(f"[_embed_to_m3u8] ERROR: {e}") finally: await p.close() + return m3u8 # ---------------- ANILIST ---------------- @@ -251,85 +259,21 @@ class AnimePahe: Media(id: $id, type: ANIME) { id idMal - title { - romaji - english - native - } + title { romaji english native } synonyms description(asHtml: false) - format - status - episodes - duration - source - countryOfOrigin - isAdult + format status episodes duration source countryOfOrigin isAdult startDate { year month day } endDate { year month day } - season - seasonYear - averageScore - meanScore - popularity - favourites - trending - genres - coverImage { - extraLarge - large - medium - color - } + season seasonYear averageScore meanScore popularity favourites trending genres + coverImage { extraLarge large medium color } bannerImage - trailer { - id - site - } - studios(isMain: true) { - nodes { name siteUrl } - } - relations { - edges { - relationType(version: 2) - node { - id - idMal - title { romaji english } - format - status - episodes - averageScore - coverImage { medium } - siteUrl - } - } - } - recommendations(perPage: 20, sort: RATING_DESC) { - nodes { - rating - mediaRecommendation { - id - idMal - title { romaji english } - format - status - episodes - averageScore - coverImage { medium } - siteUrl - } - } - } - externalLinks { - site - url - type - } - nextAiringEpisode { - airingAt - episode - } + trailer { id site } + studios(isMain: true) { nodes { name siteUrl } } + relations { edges { relationType(version: 2) node { id idMal title { romaji english } format status episodes averageScore coverImage { medium } siteUrl } } } + recommendations(perPage: 20, sort: RATING_DESC) { nodes { rating mediaRecommendation { id idMal title { romaji english } format status episodes averageScore coverImage { medium } siteUrl } } } + externalLinks { site url type } + nextAiringEpisode { airingAt episode } } } """ @@ -628,21 +572,13 @@ class AnimePahe: # ---------------- SEASONS ---------------- async def get_seasons(self, anime_id: str) -> dict: - """ - Scrape the 'More Seasons' section from aniwatchtv.to using the - existing Playwright browser context. - anime_id is the full slug, e.g. jujutsu-kaisen-the-culling-game-part-1-20401 - """ url = f"{ANIWATCHTV_BASE}/{anime_id}" page = await self.context.new_page() try: await page.goto(url, wait_until="domcontentloaded", timeout=30000) - - # Short wait for lazy-loaded images and JS rendering await asyncio.sleep(1) - # Wait for season block — try common selectors gracefully for selector in [".os-list", ".seasons-block", "[class*='season']", "main"]: try: await page.wait_for_selector(selector, timeout=5000) @@ -653,10 +589,9 @@ class AnimePahe: seasons = await page.evaluate(f"""() => {{ const BASE = "{ANIWATCHTV_BASE}"; const currentId = "{anime_id}"; - const results = []; + const results =[]; const seen = new Set(); - // Strategy 1: dedicated season list block (.os-list or similar) const block = ( document.querySelector('.os-list') || document.querySelector('.seasons-block') || @@ -664,7 +599,6 @@ class AnimePahe: document.querySelector('[class*="season-list"]') ); - // Strategy 2: find a heading that says "More Seasons" and walk up const fallbackContainer = (() => {{ for (const el of document.querySelectorAll('*')) {{ if (/more\\s+seasons?/i.test(el.innerText?.trim() || '')) {{ @@ -690,7 +624,6 @@ class AnimePahe: if (!fullUrl) continue; const slug = fullUrl.replace(/\\/$/, '').split('/').pop(); - // Include ALL slugs — current page included — dedupe only if (!slug || seen.has(slug)) continue; seen.add(slug); @@ -700,7 +633,6 @@ class AnimePahe: const titleEl = a.querySelector('span, [class*="title"], [class*="name"]'); const title = (titleEl?.innerText?.trim() || a.innerText?.trim() || slug); - // Poster is in a sibling/child div.season-poster as a CSS background-image const posterEl = a.querySelector('.season-poster') || a.closest('li, div')?.querySelector('.season-poster'); let poster = null; if (posterEl) {{ @@ -752,8 +684,8 @@ async def root(): "/ids/:session", "/episodes/:session?p=:page&resolve=false|true", "/resolve/:animeSession/:episodeSession", - "/seasons/:animeId - e.g. /seasons/jujutsu-kaisen-the-culling-game-part-1-20401", - "/poster?url=:cdnImageUrl - proxy hotlink-protected poster images", + "/seasons/:animeId", + "/poster?url=:cdnImageUrl", ], } @@ -790,17 +722,6 @@ async def api_resolve(anime: str, episode: str): @app.get("/seasons/{anime_id:path}") async def api_seasons(anime_id: str, request: Request): - """ - Scrape the More Seasons section from aniwatchtv.to. - - Example: - GET /seasons/jujutsu-kaisen-the-culling-game-part-1-20401 - - Returns: - id - the slug passed in - total - number of other seasons found - seasons[] - list of { title, id, numericId, url, poster } - """ data = await pahe.get_seasons(anime_id) base_url = str(request.base_url).rstrip("/") for season in data.get("seasons", []): @@ -813,20 +734,13 @@ async def api_seasons(anime_id: str, request: Request): @app.get("/poster") async def api_poster(url: str = Query(..., description="CDN image URL to proxy")): - """ - Proxy a hotlink-protected poster image with the correct Referer header. - Use this to display season/anime posters in the browser. - - Example: - GET /poster?url=https://cdn.noitatnemucod.net/thumbnail/100x200/100/abc123.jpg - """ try: async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client: resp = await client.get( url, headers={ "Referer": "https://aniwatchtv.to/", - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122 Safari/537.36", + "User-Agent": "Mozilla/5.0", }, ) resp.raise_for_status()