From ed26a0172b2e571dfbe354fc8cb70b8bf54a6de7 Mon Sep 17 00:00:00 2001 From: Aira Catapang Date: Tue, 17 Mar 2026 04:59:55 +0000 Subject: [PATCH] Update main.py --- main.py | 387 ++++++++++++++++++++------------------------------------ 1 file changed, 139 insertions(+), 248 deletions(-) diff --git a/main.py b/main.py index 2f0a129..17be2b9 100644 --- a/main.py +++ b/main.py @@ -11,20 +11,11 @@ from playwright.async_api import async_playwright, BrowserContext BASE_URL = "https://animepahe.si" ANILIST_API = "https://graphql.anilist.co" JIKAN_API = "https://api.jikan.moe/v4" -KITSU_API = "https://kitsu.io/api/edge" IS_HEADLESS = os.environ.get("HEADLESS", "true").lower() == "true" # In-memory caches _info_cache: dict = {} _mal_synopsis_cache: dict = {} -_kitsu_relations_cache: dict = {} - -KITSU_HEADERS = { - "Accept": "application/vnd.api+json", - "Content-Type": "application/vnd.api+json", -} - -DIRECT_RELATION_TYPES = {"sequel", "prequel", "parent", "full_story", "side_story"} class AnimePahe: @@ -80,14 +71,47 @@ class AnimePahe: finally: await page.close() - def _generate_mp4(self, m3u8_url: Optional[str], anime_id: str, res: str): + def _generate_mp4( + self, m3u8_url: Optional[str], anime_name: str, episode: str, res: str + ) -> Optional[str]: if not m3u8_url: return None match = re.search(r"(https?://[^.]+)[^/]*/stream/(.*?)/[^/]+\.m3u8", m3u8_url) if match: - return f"{match.group(1)}.kwik.cx/mp4/{match.group(2)}?file=AnimePahe_{anime_id}_{res}p.mp4" + clean_name = re.sub(r"[^\w\s]", "", anime_name).strip().replace(" ", "_") + filename = f"{clean_name}_EP{episode}_{res}P.mp4" + return f"{match.group(1)}.kwik.cx/mp4/{match.group(2)}?file={filename}" return None + async def _scrape_play_meta(self, page) -> tuple: + meta = await page.evaluate("""() => { + const titleEl = document.querySelector('.theatre-info h2 a, .anime-title, h2 a[href*="/anime/"]') + const epEl = document.querySelector('.theatre-info h2, .episode-title, h2') + + let title = titleEl ? titleEl.innerText.trim() : '' + let episode = '' + + if (epEl) { + const m = epEl.innerText.match(/episode\\s*(\\d+)/i) + if (m) episode = m[1] + } + + if (!title || !episode) { + const t = document.title || '' + const m = t.match(/^(.+?)\\s*[-\\u2013]\\s*Episode\\s*(\\d+)/i) + if (m) { + if (!title) title = m[1].trim() + if (!episode) episode = m[2].trim() + } + } + + return { title, episode } + }""") + + title = (meta.get("title") or "").strip() or "Unknown" + episode = (meta.get("episode") or "").strip() or "00" + return title, episode + # ---------------- SCRAPE IDs ONLY ---------------- async def _scrape_ids(self, session: str) -> dict: @@ -95,11 +119,26 @@ class AnimePahe: try: await page.goto( f"{BASE_URL}/anime/{session}", - wait_until="networkidle", + wait_until="domcontentloaded", timeout=30000, ) - await page.wait_for_selector(".anime-info", timeout=10000) - await asyncio.sleep(1) + + # Wait for the anime info block to render + try: + await page.wait_for_selector( + "div.anime-info, div.anime-summary, aside, main", timeout=10000 + ) + except: + pass + + # Extra wait for JS-rendered content + await asyncio.sleep(2) + + # Debug: log all hrefs found on page + all_links = await page.evaluate("""() => { + return Array.from(document.querySelectorAll('a[href]')).map(a => a.href) + }""") + print(f"[scrape_ids] All links found: {all_links}") ids = await page.evaluate("""() => { let ids = {} @@ -111,8 +150,6 @@ class AnimePahe: ids["anilist"] = url.split("/").filter(Boolean).pop() if (url.includes("anidb.net")) ids["anidb"] = url.split("/").filter(Boolean).pop() - if (url.includes("kitsu.io/anime")) - ids["kitsu"] = url.split("/").filter(Boolean).pop() if (url.includes("animenewsnetwork.com")) { const m = url.match(/id=(\\d+)/) if (m) ids["ann"] = m[1] @@ -123,6 +160,7 @@ class AnimePahe: return ids }""") + print(f"[scrape_ids] Extracted ids: {ids}") ids["animepahe"] = session return ids @@ -152,84 +190,9 @@ class AnimePahe: _mal_synopsis_cache[mal_id] = None return None - # ---------------- KITSU RELATIONS ---------------- - - async def _fetch_kitsu_relations(self, kitsu_id: str) -> list: - if kitsu_id in _kitsu_relations_cache: - return _kitsu_relations_cache[kitsu_id] - - try: - async with httpx.AsyncClient(timeout=15) as client: - url = ( - f"{KITSU_API}/anime/{kitsu_id}/media-relationships" - f"?include=destination" - f"&fields[anime]=canonicalTitle,posterImage,episodeCount,status,subtype,startDate" - f"&page[limit]=20" - ) - resp = await client.get(url, headers=KITSU_HEADERS) - resp.raise_for_status() - data = resp.json() - except Exception as e: - print(f"[kitsu_relations] failed for kitsu_id={kitsu_id}: {e}") - _kitsu_relations_cache[kitsu_id] = [] - return [] - - included = {} - for item in data.get("included", []): - included[item["id"]] = item - - direct = [] - indirect = [] - - for rel in data.get("data", []): - attrs = rel.get("attributes", {}) - role = (attrs.get("role") or "").lower() - dest_data = ( - rel.get("relationships", {}).get("destination", {}).get("data", {}) - ) - dest_type = dest_data.get("type", "") - dest_id = dest_data.get("id", "") - - if dest_type != "anime": - continue - - dest = included.get(dest_id, {}) - dest_attrs = dest.get("attributes", {}) - poster = dest_attrs.get("posterImage") or {} - - entry = { - "kitsu_id": dest_id, - "title": dest_attrs.get("canonicalTitle"), - "format": dest_attrs.get("subtype"), - "status": dest_attrs.get("status"), - "episodes": dest_attrs.get("episodeCount"), - "start_date": dest_attrs.get("startDate"), - "image": ( - poster.get("small") - or poster.get("medium") - or poster.get("original") - ), - "url": f"https://kitsu.io/anime/{dest_id}", - "relation_type": role, - } - - if role in DIRECT_RELATION_TYPES: - direct.append(entry) - else: - indirect.append(entry) - - combined = direct + indirect - _kitsu_relations_cache[kitsu_id] = combined - return combined - # ---------------- SHARED RESOLVE HELPERS ---------------- async def _collect_buttons(self, page) -> list: - """ - Read all #resolutionMenu buttons. - Returns list with embed URL, resolution (int), fansub, audio type. - data-audio="jpn" → sub, data-audio="eng" → dub - """ buttons = await page.locator("#resolutionMenu button").all() res_data = [] for btn in buttons: @@ -306,12 +269,6 @@ class AnimePahe: favourites trending genres - tags { - name - category - rank - isMediaSpoiler - } coverImage { extraLarge large @@ -326,27 +283,18 @@ class AnimePahe: studios(isMain: true) { nodes { name siteUrl } } - staff(perPage: 10) { + relations { edges { - role + relationType(version: 2) node { - name { full } - image { medium } - siteUrl - } - } - } - characters(perPage: 10, sort: [ROLE, RELEVANCE]) { - edges { - role - node { - name { full } - image { medium } - siteUrl - } - voiceActors(language: JAPANESE) { - name { full } - image { medium } + id + idMal + title { romaji english } + format + status + episodes + averageScore + coverImage { medium } siteUrl } } @@ -418,6 +366,31 @@ class AnimePahe: elif t.get("site") == "dailymotion": trailer = f"https://www.dailymotion.com/video/{t['id']}" + # ---------- Relations from AniList ---------- + relations: dict[str, list] = {} + for edge in media.get("relations", {}).get("edges", []): + node = edge.get("node", {}) + if not node: + continue + relation_type = edge.get("relationType", "OTHER") + entry = { + "id": node.get("id"), + "mal_id": node.get("idMal"), + "title": ( + node.get("title", {}).get("english") + or node.get("title", {}).get("romaji") + ), + "format": node.get("format"), + "status": node.get("status"), + "episodes": node.get("episodes"), + "score": node.get("averageScore"), + "image": node.get("coverImage", {}).get("medium"), + "url": node.get("siteUrl"), + "relation_type": relation_type, + } + relations.setdefault(relation_type, []).append(entry) + + # ---------- Recommendations ---------- recommendations = [] for node in media.get("recommendations", {}).get("nodes", []): rec = node.get("mediaRecommendation") @@ -438,38 +411,6 @@ class AnimePahe: } ) - characters = [] - for edge in media.get("characters", {}).get("edges", []): - node = edge.get("node", {}) - vas = edge.get("voiceActors", []) - characters.append( - { - "name": node.get("name", {}).get("full"), - "image": node.get("image", {}).get("medium"), - "role": edge.get("role"), - "url": node.get("siteUrl"), - "voice_actor": { - "name": vas[0]["name"]["full"], - "image": vas[0].get("image", {}).get("medium"), - "url": vas[0].get("siteUrl"), - } - if vas - else None, - } - ) - - staff = [] - for edge in media.get("staff", {}).get("edges", []): - node = edge.get("node", {}) - staff.append( - { - "name": node.get("name", {}).get("full"), - "image": node.get("image", {}).get("medium"), - "role": edge.get("role"), - "url": node.get("siteUrl"), - } - ) - return { "id": media.get("id"), "mal_id": media.get("idMal"), @@ -497,15 +438,6 @@ class AnimePahe: "favourites": media.get("favourites"), "trending": media.get("trending"), "genres": media.get("genres", []), - "tags": [ - { - "name": t["name"], - "category": t["category"], - "rank": t["rank"], - "spoiler": t["isMediaSpoiler"], - } - for t in media.get("tags", []) - ], "cover_image": media.get("coverImage", {}), "banner_image": media.get("bannerImage"), "trailer": trailer, @@ -515,9 +447,7 @@ class AnimePahe: {"site": l["site"], "url": l["url"], "type": l["type"]} for l in media.get("externalLinks", []) ], - "characters": characters, - "staff": staff, - "relations": {}, + "relations": relations, "recommendations": recommendations, } @@ -548,7 +478,7 @@ class AnimePahe: ep_session = ep.get("session") if not ep_session: return ep - stream = await self._resolve_episode(anime_id, ep_session) + stream = await self.resolve(anime_id, ep_session) ep["sub"] = stream.get("sub") ep["dub"] = stream.get("dub") return ep @@ -556,6 +486,23 @@ class AnimePahe: data["data"] = list(await asyncio.gather(*[enrich(ep) for ep in episodes])) return data + # ---------------- IDS ONLY ---------------- + + async def get_ids(self, session: str): + try: + ids = await self._scrape_ids(session) + return { + "animepahe": ids.get("animepahe"), + "anilist": ids.get("anilist"), + "mal": ids.get("mal"), + "anidb": ids.get("anidb"), + "ann": ids.get("ann"), + "animePlanet": ids.get("animePlanet"), + } + except Exception as e: + print(f"[get_ids] ERROR: {e}") + return {"error": f"Failed: {str(e)}"} + # ---------------- INFO ---------------- async def get_info(self, session: str): @@ -572,28 +519,16 @@ class AnimePahe: if anilist_id in _info_cache: return _info_cache[anilist_id] - kitsu_id = ids.get("kitsu") - - async def empty_relations(): - return [] - - anilist_task = self._fetch_anilist(anilist_id) - kitsu_task = ( - self._fetch_kitsu_relations(kitsu_id) if kitsu_id else empty_relations() - ) - - data, kitsu_relations = await asyncio.gather(anilist_task, kitsu_task) + data = await self._fetch_anilist(anilist_id) if "error" in data: return {"error": data["error"], "ids": ids} - data["relations"] = {"Related": kitsu_relations} if kitsu_relations else {} data["ids"] = { "animepahe": ids.get("animepahe"), "anilist": anilist_id, "mal": ids.get("mal"), "anidb": ids.get("anidb"), - "kitsu": kitsu_id, "ann": ids.get("ann"), "animePlanet": ids.get("animePlanet"), } @@ -605,9 +540,9 @@ class AnimePahe: print(f"[get_info] ERROR: {e}") return {"error": f"Failed: {str(e)}"} - # ---------------- _resolve_episode (used by get_episodes) ---------------- + # ---------------- RESOLVE ---------------- - async def _resolve_episode(self, anime_session: str, episode_session: str) -> dict: + async def resolve(self, anime_session: str, episode_session: str): play_url = f"{BASE_URL}/play/{anime_session}/{episode_session}" page = await self.context.new_page() @@ -618,70 +553,18 @@ class AnimePahe: state="attached", timeout=15000, ) + + anime_name, episode_num = await self._scrape_play_meta(page) res_data = await self._collect_buttons(page) await page.close() page = None - if not res_data: - return {"sub": None, "dub": None} - subs = [r for r in res_data if r["audio"] == "sub"] dubs = [r for r in res_data if r["audio"] == "dub"] best_sub = max(subs, key=lambda x: x["res"]) if subs else None best_dub = max(dubs, key=lambda x: x["res"]) if dubs else None - result = {"sub": None, "dub": None} - - async def resolve_one(item, key): - m3u8 = await self._embed_to_m3u8(item["embed"]) - res_str = str(item["res"]) - result[key] = { - "url": m3u8, - "download": self._generate_mp4(m3u8, anime_session, res_str), - "resolution": res_str, - "fansub": item["fansub"], - } - - tasks = [] - if best_sub: - tasks.append(resolve_one(best_sub, "sub")) - if best_dub: - tasks.append(resolve_one(best_dub, "dub")) - - await asyncio.gather(*tasks) - return result - - except Exception as e: - print(f"[_resolve_episode] ERROR: {e}") - return {"sub": None, "dub": None, "error": str(e)} - finally: - if page: - await page.close() - - # ---------------- RESOLVE ---------------- - - async def resolve(self, anime_session: str, episode_session: str): - """ - Resolve highest-res sub and dub for a single episode. - Returns: - sub: { resolution, fansub, audio, url, download } - dub: { resolution, fansub, audio, url, download } or null if no dub - """ - play_url = f"{BASE_URL}/play/{anime_session}/{episode_session}" - page = await self.context.new_page() - - try: - await page.goto(play_url, wait_until="domcontentloaded") - await page.wait_for_selector( - "#resolutionMenu button", - state="attached", - timeout=15000, - ) - res_data = await self._collect_buttons(page) - await page.close() - page = None - - async def resolve_source(item): + async def resolve_one(item): try: m3u8 = await self._embed_to_m3u8(item["embed"]) res_str = str(item["res"]) @@ -691,7 +574,9 @@ class AnimePahe: "audio": item["audio"], "audio_lang": item["audio_lang"], "url": m3u8, - "download": self._generate_mp4(m3u8, anime_session, res_str), + "download": self._generate_mp4( + m3u8, anime_name, episode_num, res_str + ), } except Exception as e: return { @@ -704,27 +589,28 @@ class AnimePahe: "error": str(e), } - all_sources = list( - await asyncio.gather(*[resolve_source(i) for i in res_data]) + tasks = [] + if best_sub: + tasks.append(resolve_one(best_sub)) + if best_dub: + tasks.append(resolve_one(best_dub)) + + results = await asyncio.gather(*tasks) + + sub_result = results[0] if best_sub else None + dub_result = ( + results[1] + if best_sub and best_dub + else (results[0] if best_dub else None) ) - sub_sources = [s for s in all_sources if s["audio"] == "sub"] - dub_sources = [s for s in all_sources if s["audio"] == "dub"] - - def best(sources): - if not sources: - return None - return max( - [s for s in sources if s["url"]], - key=lambda x: int(x["resolution"]) if x["resolution"] else 0, - default=None, - ) - return { "anime": anime_session, "episode": episode_session, - "sub": best(sub_sources), - "dub": best(dub_sources), + "anime_name": anime_name, + "episode_num": episode_num, + "sub": sub_result, + "dub": dub_result, } except Exception as e: @@ -762,6 +648,11 @@ async def api_info(session: str): return await pahe.get_info(session) +@app.get("/ids/{session}") +async def api_ids(session: str): + return await pahe.get_ids(session) + + @app.get("/episodes/{session}") async def api_episodes(session: str, p: int = 1, resolve: bool = False): return await pahe.get_episodes(session, p, resolve)