From ed26a0172b2e571dfbe354fc8cb70b8bf54a6de7 Mon Sep 17 00:00:00 2001
From: Aira Catapang <Catapang1989@users.noreply.huggingface.co>
Date: Tue, 17 Mar 2026 04:59:55 +0000
Subject: [PATCH] Update main.py

---
 main.py | 387 ++++++++++++++++++++------------------------------------
 1 file changed, 139 insertions(+), 248 deletions(-)

diff --git a/main.py b/main.py
index 2f0a129..17be2b9 100644
--- a/main.py
+++ b/main.py
@@ -11,20 +11,11 @@ from playwright.async_api import async_playwright, BrowserContext
 BASE_URL = "https://animepahe.si"
 ANILIST_API = "https://graphql.anilist.co"
 JIKAN_API = "https://api.jikan.moe/v4"
-KITSU_API = "https://kitsu.io/api/edge"
 IS_HEADLESS = os.environ.get("HEADLESS", "true").lower() == "true"
 
 # In-memory caches
 _info_cache: dict = {}
 _mal_synopsis_cache: dict = {}
-_kitsu_relations_cache: dict = {}
-
-KITSU_HEADERS = {
-    "Accept": "application/vnd.api+json",
-    "Content-Type": "application/vnd.api+json",
-}
-
-DIRECT_RELATION_TYPES = {"sequel", "prequel", "parent", "full_story", "side_story"}
 
 
 class AnimePahe:
@@ -80,14 +71,47 @@ class AnimePahe:
         finally:
             await page.close()
 
-    def _generate_mp4(self, m3u8_url: Optional[str], anime_id: str, res: str):
+    def _generate_mp4(
+        self, m3u8_url: Optional[str], anime_name: str, episode: str, res: str
+    ) -> Optional[str]:
         if not m3u8_url:
             return None
         match = re.search(r"(https?://[^.]+)[^/]*/stream/(.*?)/[^/]+\.m3u8", m3u8_url)
         if match:
-            return f"{match.group(1)}.kwik.cx/mp4/{match.group(2)}?file=AnimePahe_{anime_id}_{res}p.mp4"
+            clean_name = re.sub(r"[^\w\s]", "", anime_name).strip().replace(" ", "_")
+            filename = f"{clean_name}_EP{episode}_{res}P.mp4"
+            return f"{match.group(1)}.kwik.cx/mp4/{match.group(2)}?file={filename}"
         return None
 
+    async def _scrape_play_meta(self, page) -> tuple:
+        meta = await page.evaluate("""() => {
+            const titleEl = document.querySelector('.theatre-info h2 a, .anime-title, h2 a[href*="/anime/"]')
+            const epEl = document.querySelector('.theatre-info h2, .episode-title, h2')
+
+            let title = titleEl ? titleEl.innerText.trim() : ''
+            let episode = ''
+
+            if (epEl) {
+                const m = epEl.innerText.match(/episode\\s*(\\d+)/i)
+                if (m) episode = m[1]
+            }
+
+            if (!title || !episode) {
+                const t = document.title || ''
+                const m = t.match(/^(.+?)\\s*[-\\u2013]\\s*Episode\\s*(\\d+)/i)
+                if (m) {
+                    if (!title) title = m[1].trim()
+                    if (!episode) episode = m[2].trim()
+                }
+            }
+
+            return { title, episode }
+        }""")
+
+        title = (meta.get("title") or "").strip() or "Unknown"
+        episode = (meta.get("episode") or "").strip() or "00"
+        return title, episode
+
     # ---------------- SCRAPE IDs ONLY ----------------
 
     async def _scrape_ids(self, session: str) -> dict:
@@ -95,11 +119,26 @@ class AnimePahe:
         try:
             await page.goto(
                 f"{BASE_URL}/anime/{session}",
-                wait_until="networkidle",
+                wait_until="domcontentloaded",
                 timeout=30000,
             )
-            await page.wait_for_selector(".anime-info", timeout=10000)
-            await asyncio.sleep(1)
+
+            # Wait for the anime info block to render
+            try:
+                await page.wait_for_selector(
+                    "div.anime-info, div.anime-summary, aside, main", timeout=10000
+                )
+            except:
+                pass
+
+            # Extra wait for JS-rendered content
+            await asyncio.sleep(2)
+
+            # Debug: log all hrefs found on page
+            all_links = await page.evaluate("""() => {
+                return Array.from(document.querySelectorAll('a[href]')).map(a => a.href)
+            }""")
+            print(f"[scrape_ids] All links found: {all_links}")
 
             ids = await page.evaluate("""() => {
                 let ids = {}
@@ -111,8 +150,6 @@ class AnimePahe:
                         ids["anilist"] = url.split("/").filter(Boolean).pop()
                     if (url.includes("anidb.net"))
                         ids["anidb"] = url.split("/").filter(Boolean).pop()
-                    if (url.includes("kitsu.io/anime"))
-                        ids["kitsu"] = url.split("/").filter(Boolean).pop()
                     if (url.includes("animenewsnetwork.com")) {
                         const m = url.match(/id=(\\d+)/)
                         if (m) ids["ann"] = m[1]
@@ -123,6 +160,7 @@ class AnimePahe:
                 return ids
             }""")
 
+            print(f"[scrape_ids] Extracted ids: {ids}")
             ids["animepahe"] = session
             return ids
 
@@ -152,84 +190,9 @@ class AnimePahe:
             _mal_synopsis_cache[mal_id] = None
             return None
 
-    # ---------------- KITSU RELATIONS ----------------
-
-    async def _fetch_kitsu_relations(self, kitsu_id: str) -> list:
-        if kitsu_id in _kitsu_relations_cache:
-            return _kitsu_relations_cache[kitsu_id]
-
-        try:
-            async with httpx.AsyncClient(timeout=15) as client:
-                url = (
-                    f"{KITSU_API}/anime/{kitsu_id}/media-relationships"
-                    f"?include=destination"
-                    f"&fields[anime]=canonicalTitle,posterImage,episodeCount,status,subtype,startDate"
-                    f"&page[limit]=20"
-                )
-                resp = await client.get(url, headers=KITSU_HEADERS)
-                resp.raise_for_status()
-                data = resp.json()
-        except Exception as e:
-            print(f"[kitsu_relations] failed for kitsu_id={kitsu_id}: {e}")
-            _kitsu_relations_cache[kitsu_id] = []
-            return []
-
-        included = {}
-        for item in data.get("included", []):
-            included[item["id"]] = item
-
-        direct = []
-        indirect = []
-
-        for rel in data.get("data", []):
-            attrs = rel.get("attributes", {})
-            role = (attrs.get("role") or "").lower()
-            dest_data = (
-                rel.get("relationships", {}).get("destination", {}).get("data", {})
-            )
-            dest_type = dest_data.get("type", "")
-            dest_id = dest_data.get("id", "")
-
-            if dest_type != "anime":
-                continue
-
-            dest = included.get(dest_id, {})
-            dest_attrs = dest.get("attributes", {})
-            poster = dest_attrs.get("posterImage") or {}
-
-            entry = {
-                "kitsu_id": dest_id,
-                "title": dest_attrs.get("canonicalTitle"),
-                "format": dest_attrs.get("subtype"),
-                "status": dest_attrs.get("status"),
-                "episodes": dest_attrs.get("episodeCount"),
-                "start_date": dest_attrs.get("startDate"),
-                "image": (
-                    poster.get("small")
-                    or poster.get("medium")
-                    or poster.get("original")
-                ),
-                "url": f"https://kitsu.io/anime/{dest_id}",
-                "relation_type": role,
-            }
-
-            if role in DIRECT_RELATION_TYPES:
-                direct.append(entry)
-            else:
-                indirect.append(entry)
-
-        combined = direct + indirect
-        _kitsu_relations_cache[kitsu_id] = combined
-        return combined
-
     # ---------------- SHARED RESOLVE HELPERS ----------------
 
     async def _collect_buttons(self, page) -> list:
-        """
-        Read all #resolutionMenu buttons.
-        Returns list with embed URL, resolution (int), fansub, audio type.
-        data-audio="jpn" → sub, data-audio="eng" → dub
-        """
         buttons = await page.locator("#resolutionMenu button").all()
         res_data = []
         for btn in buttons:
@@ -306,12 +269,6 @@ class AnimePahe:
             favourites
             trending
             genres
-            tags {
-              name
-              category
-              rank
-              isMediaSpoiler
-            }
             coverImage {
               extraLarge
               large
@@ -326,27 +283,18 @@ class AnimePahe:
             studios(isMain: true) {
               nodes { name siteUrl }
             }
-            staff(perPage: 10) {
+            relations {
               edges {
-                role
+                relationType(version: 2)
                 node {
-                  name { full }
-                  image { medium }
-                  siteUrl
-                }
-              }
-            }
-            characters(perPage: 10, sort: [ROLE, RELEVANCE]) {
-              edges {
-                role
-                node {
-                  name { full }
-                  image { medium }
-                  siteUrl
-                }
-                voiceActors(language: JAPANESE) {
-                  name { full }
-                  image { medium }
+                  id
+                  idMal
+                  title { romaji english }
+                  format
+                  status
+                  episodes
+                  averageScore
+                  coverImage { medium }
                   siteUrl
                 }
               }
@@ -418,6 +366,31 @@ class AnimePahe:
             elif t.get("site") == "dailymotion":
                 trailer = f"https://www.dailymotion.com/video/{t['id']}"
 
+        # ---------- Relations from AniList ----------
+        relations: dict[str, list] = {}
+        for edge in media.get("relations", {}).get("edges", []):
+            node = edge.get("node", {})
+            if not node:
+                continue
+            relation_type = edge.get("relationType", "OTHER")
+            entry = {
+                "id": node.get("id"),
+                "mal_id": node.get("idMal"),
+                "title": (
+                    node.get("title", {}).get("english")
+                    or node.get("title", {}).get("romaji")
+                ),
+                "format": node.get("format"),
+                "status": node.get("status"),
+                "episodes": node.get("episodes"),
+                "score": node.get("averageScore"),
+                "image": node.get("coverImage", {}).get("medium"),
+                "url": node.get("siteUrl"),
+                "relation_type": relation_type,
+            }
+            relations.setdefault(relation_type, []).append(entry)
+
+        # ---------- Recommendations ----------
         recommendations = []
         for node in media.get("recommendations", {}).get("nodes", []):
             rec = node.get("mediaRecommendation")
@@ -438,38 +411,6 @@ class AnimePahe:
                 }
             )
 
-        characters = []
-        for edge in media.get("characters", {}).get("edges", []):
-            node = edge.get("node", {})
-            vas = edge.get("voiceActors", [])
-            characters.append(
-                {
-                    "name": node.get("name", {}).get("full"),
-                    "image": node.get("image", {}).get("medium"),
-                    "role": edge.get("role"),
-                    "url": node.get("siteUrl"),
-                    "voice_actor": {
-                        "name": vas[0]["name"]["full"],
-                        "image": vas[0].get("image", {}).get("medium"),
-                        "url": vas[0].get("siteUrl"),
-                    }
-                    if vas
-                    else None,
-                }
-            )
-
-        staff = []
-        for edge in media.get("staff", {}).get("edges", []):
-            node = edge.get("node", {})
-            staff.append(
-                {
-                    "name": node.get("name", {}).get("full"),
-                    "image": node.get("image", {}).get("medium"),
-                    "role": edge.get("role"),
-                    "url": node.get("siteUrl"),
-                }
-            )
-
         return {
             "id": media.get("id"),
             "mal_id": media.get("idMal"),
@@ -497,15 +438,6 @@ class AnimePahe:
             "favourites": media.get("favourites"),
             "trending": media.get("trending"),
             "genres": media.get("genres", []),
-            "tags": [
-                {
-                    "name": t["name"],
-                    "category": t["category"],
-                    "rank": t["rank"],
-                    "spoiler": t["isMediaSpoiler"],
-                }
-                for t in media.get("tags", [])
-            ],
             "cover_image": media.get("coverImage", {}),
             "banner_image": media.get("bannerImage"),
             "trailer": trailer,
@@ -515,9 +447,7 @@ class AnimePahe:
                 {"site": l["site"], "url": l["url"], "type": l["type"]}
                 for l in media.get("externalLinks", [])
             ],
-            "characters": characters,
-            "staff": staff,
-            "relations": {},
+            "relations": relations,
             "recommendations": recommendations,
         }
 
@@ -548,7 +478,7 @@ class AnimePahe:
             ep_session = ep.get("session")
             if not ep_session:
                 return ep
-            stream = await self._resolve_episode(anime_id, ep_session)
+            stream = await self.resolve(anime_id, ep_session)
             ep["sub"] = stream.get("sub")
             ep["dub"] = stream.get("dub")
             return ep
@@ -556,6 +486,23 @@ class AnimePahe:
         data["data"] = list(await asyncio.gather(*[enrich(ep) for ep in episodes]))
         return data
 
+    # ---------------- IDS ONLY ----------------
+
+    async def get_ids(self, session: str):
+        try:
+            ids = await self._scrape_ids(session)
+            return {
+                "animepahe": ids.get("animepahe"),
+                "anilist": ids.get("anilist"),
+                "mal": ids.get("mal"),
+                "anidb": ids.get("anidb"),
+                "ann": ids.get("ann"),
+                "animePlanet": ids.get("animePlanet"),
+            }
+        except Exception as e:
+            print(f"[get_ids] ERROR: {e}")
+            return {"error": f"Failed: {str(e)}"}
+
     # ---------------- INFO ----------------
 
     async def get_info(self, session: str):
@@ -572,28 +519,16 @@ class AnimePahe:
             if anilist_id in _info_cache:
                 return _info_cache[anilist_id]
 
-            kitsu_id = ids.get("kitsu")
-
-            async def empty_relations():
-                return []
-
-            anilist_task = self._fetch_anilist(anilist_id)
-            kitsu_task = (
-                self._fetch_kitsu_relations(kitsu_id) if kitsu_id else empty_relations()
-            )
-
-            data, kitsu_relations = await asyncio.gather(anilist_task, kitsu_task)
+            data = await self._fetch_anilist(anilist_id)
 
             if "error" in data:
                 return {"error": data["error"], "ids": ids}
 
-            data["relations"] = {"Related": kitsu_relations} if kitsu_relations else {}
             data["ids"] = {
                 "animepahe": ids.get("animepahe"),
                 "anilist": anilist_id,
                 "mal": ids.get("mal"),
                 "anidb": ids.get("anidb"),
-                "kitsu": kitsu_id,
                 "ann": ids.get("ann"),
                 "animePlanet": ids.get("animePlanet"),
             }
@@ -605,9 +540,9 @@ class AnimePahe:
             print(f"[get_info] ERROR: {e}")
             return {"error": f"Failed: {str(e)}"}
 
-    # ---------------- _resolve_episode (used by get_episodes) ----------------
+    # ---------------- RESOLVE ----------------
 
-    async def _resolve_episode(self, anime_session: str, episode_session: str) -> dict:
+    async def resolve(self, anime_session: str, episode_session: str):
         play_url = f"{BASE_URL}/play/{anime_session}/{episode_session}"
         page = await self.context.new_page()
 
@@ -618,70 +553,18 @@ class AnimePahe:
                 state="attached",
                 timeout=15000,
             )
+
+            anime_name, episode_num = await self._scrape_play_meta(page)
             res_data = await self._collect_buttons(page)
             await page.close()
             page = None
 
-            if not res_data:
-                return {"sub": None, "dub": None}
-
             subs = [r for r in res_data if r["audio"] == "sub"]
             dubs = [r for r in res_data if r["audio"] == "dub"]
             best_sub = max(subs, key=lambda x: x["res"]) if subs else None
             best_dub = max(dubs, key=lambda x: x["res"]) if dubs else None
 
-            result = {"sub": None, "dub": None}
-
-            async def resolve_one(item, key):
-                m3u8 = await self._embed_to_m3u8(item["embed"])
-                res_str = str(item["res"])
-                result[key] = {
-                    "url": m3u8,
-                    "download": self._generate_mp4(m3u8, anime_session, res_str),
-                    "resolution": res_str,
-                    "fansub": item["fansub"],
-                }
-
-            tasks = []
-            if best_sub:
-                tasks.append(resolve_one(best_sub, "sub"))
-            if best_dub:
-                tasks.append(resolve_one(best_dub, "dub"))
-
-            await asyncio.gather(*tasks)
-            return result
-
-        except Exception as e:
-            print(f"[_resolve_episode] ERROR: {e}")
-            return {"sub": None, "dub": None, "error": str(e)}
-        finally:
-            if page:
-                await page.close()
-
-    # ---------------- RESOLVE ----------------
-
-    async def resolve(self, anime_session: str, episode_session: str):
-        """
-        Resolve highest-res sub and dub for a single episode.
-        Returns:
-          sub: { resolution, fansub, audio, url, download }
-          dub: { resolution, fansub, audio, url, download } or null if no dub
-        """
-        play_url = f"{BASE_URL}/play/{anime_session}/{episode_session}"
-        page = await self.context.new_page()
-
-        try:
-            await page.goto(play_url, wait_until="domcontentloaded")
-            await page.wait_for_selector(
-                "#resolutionMenu button",
-                state="attached",
-                timeout=15000,
-            )
-            res_data = await self._collect_buttons(page)
-            await page.close()
-            page = None
-
-            async def resolve_source(item):
+            async def resolve_one(item):
                 try:
                     m3u8 = await self._embed_to_m3u8(item["embed"])
                     res_str = str(item["res"])
@@ -691,7 +574,9 @@ class AnimePahe:
                         "audio": item["audio"],
                         "audio_lang": item["audio_lang"],
                         "url": m3u8,
-                        "download": self._generate_mp4(m3u8, anime_session, res_str),
+                        "download": self._generate_mp4(
+                            m3u8, anime_name, episode_num, res_str
+                        ),
                     }
                 except Exception as e:
                     return {
@@ -704,27 +589,28 @@ class AnimePahe:
                         "error": str(e),
                     }
 
-            all_sources = list(
-                await asyncio.gather(*[resolve_source(i) for i in res_data])
+            tasks = []
+            if best_sub:
+                tasks.append(resolve_one(best_sub))
+            if best_dub:
+                tasks.append(resolve_one(best_dub))
+
+            results = await asyncio.gather(*tasks)
+
+            sub_result = results[0] if best_sub else None
+            dub_result = (
+                results[1]
+                if best_sub and best_dub
+                else (results[0] if best_dub else None)
             )
 
-            sub_sources = [s for s in all_sources if s["audio"] == "sub"]
-            dub_sources = [s for s in all_sources if s["audio"] == "dub"]
-
-            def best(sources):
-                if not sources:
-                    return None
-                return max(
-                    [s for s in sources if s["url"]],
-                    key=lambda x: int(x["resolution"]) if x["resolution"] else 0,
-                    default=None,
-                )
-
             return {
                 "anime": anime_session,
                 "episode": episode_session,
-                "sub": best(sub_sources),
-                "dub": best(dub_sources),
+                "anime_name": anime_name,
+                "episode_num": episode_num,
+                "sub": sub_result,
+                "dub": dub_result,
             }
 
         except Exception as e:
@@ -762,6 +648,11 @@ async def api_info(session: str):
     return await pahe.get_info(session)
 
 
+@app.get("/ids/{session}")
+async def api_ids(session: str):
+    return await pahe.get_ids(session)
+
+
 @app.get("/episodes/{session}")
 async def api_episodes(session: str, p: int = 1, resolve: bool = False):
     return await pahe.get_episodes(session, p, resolve)