mirror of
https://huggingface.co/spaces/Catapang1989/aniscrap
synced 2026-04-17 15:51:46 +00:00
Update main.py
This commit is contained in:
322
main.py
322
main.py
@@ -3,11 +3,14 @@ import asyncio
|
|||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import httpx
|
import httpx
|
||||||
|
import urllib.parse
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from fastapi import FastAPI, Query, Request
|
from fastapi import FastAPI, Query, Request
|
||||||
from fastapi.responses import StreamingResponse, Response, JSONResponse
|
from fastapi.responses import StreamingResponse, Response, JSONResponse
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from playwright.async_api import async_playwright, BrowserContext
|
from playwright.async_api import async_playwright, BrowserContext
|
||||||
|
from playwright_stealth import Stealth
|
||||||
|
|
||||||
BASE_URL = "https://animepahe.si"
|
BASE_URL = "https://animepahe.si"
|
||||||
ANIWATCHTV_BASE = "https://aniwatchtv.to"
|
ANIWATCHTV_BASE = "https://aniwatchtv.to"
|
||||||
@@ -55,7 +58,6 @@ class AnimePahe:
|
|||||||
|
|
||||||
async def _intercept_assets(self, route):
|
async def _intercept_assets(self, route):
|
||||||
url = route.request.url.lower()
|
url = route.request.url.lower()
|
||||||
# Allow requests from aniwatchtv & kwik (video host) so players/posters load correctly
|
|
||||||
if "aniwatchtv.to" in url or "kwik" in url:
|
if "aniwatchtv.to" in url or "kwik" in url:
|
||||||
await route.continue_()
|
await route.continue_()
|
||||||
return
|
return
|
||||||
@@ -83,16 +85,28 @@ class AnimePahe:
|
|||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
if not m3u8_url:
|
if not m3u8_url:
|
||||||
return None
|
return None
|
||||||
# Example: https://na-02.kwik.cx/stream/abc123def/index.m3u8
|
|
||||||
match = re.search(r"(https?://[^/]+)/stream/([^/]+)/", m3u8_url)
|
# Regex explanation:
|
||||||
|
# https?://([^.]+) matches the subdomain (e.g., vault-99)
|
||||||
|
# [^/]*/stream/ ignores the rest of the domain (e.g., .owocdn.top) and matches /stream/
|
||||||
|
# (.*?)/[^/]+\.m3u8 matches the entire token path up to the final /uwu.m3u8
|
||||||
|
match = re.search(r"https?://([^.]+)[^/]*/stream/(.*?)/[^/]+\.m3u8", m3u8_url)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
|
subdomain = match.group(1) # e.g., "vault-99"
|
||||||
|
token_path = match.group(
|
||||||
|
2
|
||||||
|
) # e.g., "99/01/d138b9bb16e0a47024fad856caab2fba99d7cbd661ef2662a3572694eaebcf9a"
|
||||||
|
|
||||||
clean_name = re.sub(r"[^\w\s]", "", anime_name).strip().replace(" ", "_")
|
clean_name = re.sub(r"[^\w\s]", "", anime_name).strip().replace(" ", "_")
|
||||||
if not clean_name:
|
if not clean_name:
|
||||||
clean_name = "Anime"
|
clean_name = "Anime"
|
||||||
|
|
||||||
filename = f"{clean_name}_EP{episode}_{res}P.mp4"
|
filename = f"{clean_name}_EP{episode}_{res}P.mp4"
|
||||||
domain = match.group(1) # e.g. https://na-02.kwik.cx
|
|
||||||
token = match.group(2) # e.g. abc123def
|
# Reconstruct the string using the kwik.cx domain and /mp4/ endpoint
|
||||||
return f"{domain}/mp4/{token}?file={filename}"
|
return f"https://{subdomain}.kwik.cx/mp4/{token_path}?file={filename}"
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def _scrape_play_meta(self, page) -> tuple:
|
async def _scrape_play_meta(self, page) -> tuple:
|
||||||
@@ -102,7 +116,6 @@ class AnimePahe:
|
|||||||
let episode = '';
|
let episode = '';
|
||||||
|
|
||||||
const t = document.title || '';
|
const t = document.title || '';
|
||||||
// Match exactly: "Anime Name - 01 - AnimePahe" OR "Anime Name - Episode 01 - AnimePahe"
|
|
||||||
const m = t.match(/^(.+?)\\s*[-\\u2013]\\s*(?:Episode\\s*)?(\\d+(?:\\.\\d+)?)/i);
|
const m = t.match(/^(.+?)\\s*[-\\u2013]\\s*(?:Episode\\s*)?(\\d+(?:\\.\\d+)?)/i);
|
||||||
|
|
||||||
if (m) {
|
if (m) {
|
||||||
@@ -117,8 +130,6 @@ class AnimePahe:
|
|||||||
episode = (meta.get("episode") or "").strip() or "00"
|
episode = (meta.get("episode") or "").strip() or "00"
|
||||||
return title, episode
|
return title, episode
|
||||||
|
|
||||||
# ---------------- SCRAPE IDs ONLY ----------------
|
|
||||||
|
|
||||||
async def _scrape_ids(self, session: str) -> dict:
|
async def _scrape_ids(self, session: str) -> dict:
|
||||||
page = await self.context.new_page()
|
page = await self.context.new_page()
|
||||||
try:
|
try:
|
||||||
@@ -127,47 +138,35 @@ class AnimePahe:
|
|||||||
wait_until="domcontentloaded",
|
wait_until="domcontentloaded",
|
||||||
timeout=30000,
|
timeout=30000,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await page.wait_for_selector(
|
await page.wait_for_selector(
|
||||||
"div.anime-info, div.anime-summary, aside, main", timeout=10000
|
"div.anime-info, div.anime-summary, aside, main", timeout=10000
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
ids = await page.evaluate("""() => {
|
ids = await page.evaluate("""() => {
|
||||||
let ids = {}
|
let ids = {}
|
||||||
document.querySelectorAll("a[href]").forEach(a => {
|
document.querySelectorAll("a[href]").forEach(a => {
|
||||||
const url = a.href || ""
|
const url = a.href || ""
|
||||||
if (url.includes("myanimelist.net/anime"))
|
if (url.includes("myanimelist.net/anime")) ids["mal"] = url.split("/").filter(Boolean).pop()
|
||||||
ids["mal"] = url.split("/").filter(Boolean).pop()
|
if (url.includes("anilist.co/anime")) ids["anilist"] = url.split("/").filter(Boolean).pop()
|
||||||
if (url.includes("anilist.co/anime"))
|
if (url.includes("anidb.net")) ids["anidb"] = url.split("/").filter(Boolean).pop()
|
||||||
ids["anilist"] = url.split("/").filter(Boolean).pop()
|
|
||||||
if (url.includes("anidb.net"))
|
|
||||||
ids["anidb"] = url.split("/").filter(Boolean).pop()
|
|
||||||
if (url.includes("animenewsnetwork.com")) {
|
if (url.includes("animenewsnetwork.com")) {
|
||||||
const m = url.match(/id=(\\d+)/)
|
const m = url.match(/id=(\\d+)/)
|
||||||
if (m) ids["ann"] = m[1]
|
if (m) ids["ann"] = m[1]
|
||||||
}
|
}
|
||||||
if (url.includes("anime-planet.com/anime"))
|
if (url.includes("anime-planet.com/anime")) ids["animePlanet"] = url.split("/").filter(Boolean).pop()
|
||||||
ids["animePlanet"] = url.split("/").filter(Boolean).pop()
|
|
||||||
})
|
})
|
||||||
return ids
|
return ids
|
||||||
}""")
|
}""")
|
||||||
|
|
||||||
ids["animepahe"] = session
|
ids["animepahe"] = session
|
||||||
return ids
|
return ids
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[scrape_ids] ERROR: {e}")
|
|
||||||
return {"animepahe": session}
|
return {"animepahe": session}
|
||||||
finally:
|
finally:
|
||||||
await page.close()
|
await page.close()
|
||||||
|
|
||||||
# ---------------- MAL SYNOPSIS ----------------
|
|
||||||
|
|
||||||
async def _fetch_mal_synopsis(self, mal_id: str) -> Optional[str]:
|
async def _fetch_mal_synopsis(self, mal_id: str) -> Optional[str]:
|
||||||
if mal_id in _mal_synopsis_cache:
|
if mal_id in _mal_synopsis_cache:
|
||||||
return _mal_synopsis_cache[mal_id]
|
return _mal_synopsis_cache[mal_id]
|
||||||
@@ -181,13 +180,10 @@ class AnimePahe:
|
|||||||
synopsis = resp.json().get("data", {}).get("synopsis")
|
synopsis = resp.json().get("data", {}).get("synopsis")
|
||||||
_mal_synopsis_cache[mal_id] = synopsis
|
_mal_synopsis_cache[mal_id] = synopsis
|
||||||
return synopsis
|
return synopsis
|
||||||
except Exception as e:
|
except Exception:
|
||||||
print(f"[mal_synopsis] failed for mal_id={mal_id}: {e}")
|
|
||||||
_mal_synopsis_cache[mal_id] = None
|
_mal_synopsis_cache[mal_id] = None
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# ---------------- SHARED RESOLVE HELPERS ----------------
|
|
||||||
|
|
||||||
async def _collect_buttons(self, page) -> list:
|
async def _collect_buttons(self, page) -> list:
|
||||||
buttons = await page.locator("#resolutionMenu button").all()
|
buttons = await page.locator("#resolutionMenu button").all()
|
||||||
res_data = []
|
res_data = []
|
||||||
@@ -220,11 +216,8 @@ class AnimePahe:
|
|||||||
|
|
||||||
p.on("request", capture)
|
p.on("request", capture)
|
||||||
try:
|
try:
|
||||||
# Set the exact referer Kwik expects to prevent token rejections
|
|
||||||
await p.set_extra_http_headers({"Referer": "https://animepahe.si/"})
|
await p.set_extra_http_headers({"Referer": "https://animepahe.si/"})
|
||||||
await p.goto(embed_url, wait_until="domcontentloaded", timeout=15000)
|
await p.goto(embed_url, wait_until="domcontentloaded", timeout=15000)
|
||||||
|
|
||||||
# Click loop: Muting allows browsers to bypass autoplay restrictions safely
|
|
||||||
for _ in range(6):
|
for _ in range(6):
|
||||||
if found.is_set():
|
if found.is_set():
|
||||||
break
|
break
|
||||||
@@ -239,45 +232,30 @@ class AnimePahe:
|
|||||||
});
|
});
|
||||||
}""")
|
}""")
|
||||||
await asyncio.sleep(1.5)
|
await asyncio.sleep(1.5)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await asyncio.wait_for(found.wait(), timeout=5.0)
|
await asyncio.wait_for(found.wait(), timeout=5.0)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception:
|
||||||
print(f"[_embed_to_m3u8] ERROR: {e}")
|
pass
|
||||||
finally:
|
finally:
|
||||||
await p.close()
|
await p.close()
|
||||||
|
|
||||||
return m3u8
|
return m3u8
|
||||||
|
|
||||||
# ---------------- ANILIST ----------------
|
|
||||||
|
|
||||||
async def _fetch_anilist(self, anilist_id: str) -> dict:
|
async def _fetch_anilist(self, anilist_id: str) -> dict:
|
||||||
query = """
|
query = """
|
||||||
query ($id: Int) {
|
query ($id: Int) {
|
||||||
Media(id: $id, type: ANIME) {
|
Media(id: $id, type: ANIME) {
|
||||||
id
|
id idMal title { romaji english native } synonyms description(asHtml: false) format status episodes duration source countryOfOrigin isAdult
|
||||||
idMal
|
startDate { year month day } endDate { year month day } season seasonYear averageScore meanScore popularity favourites trending genres
|
||||||
title { romaji english native }
|
coverImage { extraLarge large medium color } bannerImage trailer { id site } studios(isMain: true) { nodes { name siteUrl } }
|
||||||
synonyms
|
|
||||||
description(asHtml: false)
|
|
||||||
format status episodes duration source countryOfOrigin isAdult
|
|
||||||
startDate { year month day }
|
|
||||||
endDate { year month day }
|
|
||||||
season seasonYear averageScore meanScore popularity favourites trending genres
|
|
||||||
coverImage { extraLarge large medium color }
|
|
||||||
bannerImage
|
|
||||||
trailer { id site }
|
|
||||||
studios(isMain: true) { nodes { name siteUrl } }
|
|
||||||
relations { edges { relationType(version: 2) node { id idMal title { romaji english } format status episodes averageScore coverImage { medium } siteUrl } } }
|
relations { edges { relationType(version: 2) node { id idMal title { romaji english } format status episodes averageScore coverImage { medium } siteUrl } } }
|
||||||
recommendations(perPage: 20, sort: RATING_DESC) { nodes { rating mediaRecommendation { id idMal title { romaji english } format status episodes averageScore coverImage { medium } siteUrl } } }
|
recommendations(perPage: 20, sort: RATING_DESC) { nodes { rating mediaRecommendation { id idMal title { romaji english } format status episodes averageScore coverImage { medium } siteUrl } } }
|
||||||
externalLinks { site url type }
|
externalLinks { site url type } nextAiringEpisode { airingAt episode }
|
||||||
nextAiringEpisode { airingAt episode }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=15) as client:
|
async with httpx.AsyncClient(timeout=15) as client:
|
||||||
resp = await client.post(
|
resp = await client.post(
|
||||||
@@ -291,7 +269,6 @@ class AnimePahe:
|
|||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
result = resp.json()
|
result = resp.json()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[anilist] failed for id={anilist_id}: {e}")
|
|
||||||
return {"error": f"AniList fetch failed: {str(e)}"}
|
return {"error": f"AniList fetch failed: {str(e)}"}
|
||||||
|
|
||||||
media = result.get("data", {}).get("Media")
|
media = result.get("data", {}).get("Media")
|
||||||
@@ -305,8 +282,11 @@ class AnimePahe:
|
|||||||
def fmt_date(d):
|
def fmt_date(d):
|
||||||
if not d or not d.get("year"):
|
if not d or not d.get("year"):
|
||||||
return None
|
return None
|
||||||
parts = [d.get("year"), d.get("month"), d.get("day")]
|
return "-".join(
|
||||||
return "-".join(str(p).zfill(2) for p in parts if p)
|
str(p).zfill(2)
|
||||||
|
for p in [d.get("year"), d.get("month"), d.get("day")]
|
||||||
|
if p
|
||||||
|
)
|
||||||
|
|
||||||
trailer = None
|
trailer = None
|
||||||
if media.get("trailer"):
|
if media.get("trailer"):
|
||||||
@@ -316,14 +296,14 @@ class AnimePahe:
|
|||||||
elif t.get("site") == "dailymotion":
|
elif t.get("site") == "dailymotion":
|
||||||
trailer = f"https://www.dailymotion.com/video/{t['id']}"
|
trailer = f"https://www.dailymotion.com/video/{t['id']}"
|
||||||
|
|
||||||
# ---------- Relations from AniList ----------
|
relations = {}
|
||||||
relations: dict[str, list] = {}
|
|
||||||
for edge in media.get("relations", {}).get("edges", []):
|
for edge in media.get("relations", {}).get("edges", []):
|
||||||
node = edge.get("node", {})
|
node = edge.get("node", {})
|
||||||
if not node:
|
if not node:
|
||||||
continue
|
continue
|
||||||
relation_type = edge.get("relationType", "OTHER")
|
rel = edge.get("relationType", "OTHER")
|
||||||
entry = {
|
relations.setdefault(rel, []).append(
|
||||||
|
{
|
||||||
"id": node.get("id"),
|
"id": node.get("id"),
|
||||||
"mal_id": node.get("idMal"),
|
"mal_id": node.get("idMal"),
|
||||||
"title": (
|
"title": (
|
||||||
@@ -336,11 +316,10 @@ class AnimePahe:
|
|||||||
"score": node.get("averageScore"),
|
"score": node.get("averageScore"),
|
||||||
"image": node.get("coverImage", {}).get("medium"),
|
"image": node.get("coverImage", {}).get("medium"),
|
||||||
"url": node.get("siteUrl"),
|
"url": node.get("siteUrl"),
|
||||||
"relation_type": relation_type,
|
"relation_type": rel,
|
||||||
}
|
}
|
||||||
relations.setdefault(relation_type, []).append(entry)
|
)
|
||||||
|
|
||||||
# ---------- Recommendations ----------
|
|
||||||
recommendations = []
|
recommendations = []
|
||||||
for node in media.get("recommendations", {}).get("nodes", []):
|
for node in media.get("recommendations", {}).get("nodes", []):
|
||||||
rec = node.get("mediaRecommendation")
|
rec = node.get("mediaRecommendation")
|
||||||
@@ -364,11 +343,7 @@ class AnimePahe:
|
|||||||
return {
|
return {
|
||||||
"id": media.get("id"),
|
"id": media.get("id"),
|
||||||
"mal_id": media.get("idMal"),
|
"mal_id": media.get("idMal"),
|
||||||
"title": {
|
"title": media["title"],
|
||||||
"romaji": media["title"].get("romaji"),
|
|
||||||
"english": media["title"].get("english"),
|
|
||||||
"native": media["title"].get("native"),
|
|
||||||
},
|
|
||||||
"synonyms": media.get("synonyms", []),
|
"synonyms": media.get("synonyms", []),
|
||||||
"synopsis": synopsis,
|
"synopsis": synopsis,
|
||||||
"format": media.get("format"),
|
"format": media.get("format"),
|
||||||
@@ -401,27 +376,19 @@ class AnimePahe:
|
|||||||
"recommendations": recommendations,
|
"recommendations": recommendations,
|
||||||
}
|
}
|
||||||
|
|
||||||
# ---------------- SEARCH ----------------
|
|
||||||
|
|
||||||
async def search(self, q: str):
|
async def search(self, q: str):
|
||||||
data = await self._fetch_json(f"{BASE_URL}/api?m=search&q={q}")
|
data = await self._fetch_json(f"{BASE_URL}/api?m=search&q={q}")
|
||||||
return data.get("data", []) if data else []
|
return data.get("data", []) if data else []
|
||||||
|
|
||||||
# ---------------- LATEST ----------------
|
|
||||||
|
|
||||||
async def get_latest(self, p: int = 1):
|
async def get_latest(self, p: int = 1):
|
||||||
return await self._fetch_json(f"{BASE_URL}/api?m=airing&page={p}")
|
return await self._fetch_json(f"{BASE_URL}/api?m=airing&page={p}")
|
||||||
|
|
||||||
# ---------------- EPISODES ----------------
|
|
||||||
|
|
||||||
async def get_episodes(self, anime_id: str, p: int = 1, resolve: bool = False):
|
async def get_episodes(self, anime_id: str, p: int = 1, resolve: bool = False):
|
||||||
data = await self._fetch_json(
|
data = await self._fetch_json(
|
||||||
f"{BASE_URL}/api?m=release&id={anime_id}&sort=episode_desc&page={p}"
|
f"{BASE_URL}/api?m=release&id={anime_id}&sort=episode_desc&page={p}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if not data or not resolve:
|
if not data or not resolve:
|
||||||
return data
|
return data
|
||||||
|
|
||||||
episodes = data.get("data", [])
|
episodes = data.get("data", [])
|
||||||
|
|
||||||
async def enrich(ep):
|
async def enrich(ep):
|
||||||
@@ -436,8 +403,6 @@ class AnimePahe:
|
|||||||
data["data"] = list(await asyncio.gather(*[enrich(ep) for ep in episodes]))
|
data["data"] = list(await asyncio.gather(*[enrich(ep) for ep in episodes]))
|
||||||
return data
|
return data
|
||||||
|
|
||||||
# ---------------- IDS ONLY ----------------
|
|
||||||
|
|
||||||
async def get_ids(self, session: str):
|
async def get_ids(self, session: str):
|
||||||
try:
|
try:
|
||||||
ids = await self._scrape_ids(session)
|
ids = await self._scrape_ids(session)
|
||||||
@@ -450,30 +415,19 @@ class AnimePahe:
|
|||||||
"animePlanet": ids.get("animePlanet"),
|
"animePlanet": ids.get("animePlanet"),
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[get_ids] ERROR: {e}")
|
|
||||||
return {"error": f"Failed: {str(e)}"}
|
return {"error": f"Failed: {str(e)}"}
|
||||||
|
|
||||||
# ---------------- INFO ----------------
|
|
||||||
|
|
||||||
async def get_info(self, session: str):
|
async def get_info(self, session: str):
|
||||||
try:
|
try:
|
||||||
ids = await self._scrape_ids(session)
|
ids = await self._scrape_ids(session)
|
||||||
|
|
||||||
anilist_id = ids.get("anilist")
|
anilist_id = ids.get("anilist")
|
||||||
if not anilist_id:
|
if not anilist_id:
|
||||||
return {
|
return {"error": "Could not find AniList ID", "ids": ids}
|
||||||
"error": "Could not find AniList ID on AnimePahe page",
|
|
||||||
"ids": ids,
|
|
||||||
}
|
|
||||||
|
|
||||||
if anilist_id in _info_cache:
|
if anilist_id in _info_cache:
|
||||||
return _info_cache[anilist_id]
|
return _info_cache[anilist_id]
|
||||||
|
|
||||||
data = await self._fetch_anilist(anilist_id)
|
data = await self._fetch_anilist(anilist_id)
|
||||||
|
|
||||||
if "error" in data:
|
if "error" in data:
|
||||||
return {"error": data["error"], "ids": ids}
|
return {"error": data["error"], "ids": ids}
|
||||||
|
|
||||||
data["ids"] = {
|
data["ids"] = {
|
||||||
"animepahe": ids.get("animepahe"),
|
"animepahe": ids.get("animepahe"),
|
||||||
"anilist": anilist_id,
|
"anilist": anilist_id,
|
||||||
@@ -482,30 +436,35 @@ class AnimePahe:
|
|||||||
"ann": ids.get("ann"),
|
"ann": ids.get("ann"),
|
||||||
"animePlanet": ids.get("animePlanet"),
|
"animePlanet": ids.get("animePlanet"),
|
||||||
}
|
}
|
||||||
|
|
||||||
_info_cache[anilist_id] = data
|
_info_cache[anilist_id] = data
|
||||||
return data
|
return data
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[get_info] ERROR: {e}")
|
|
||||||
return {"error": f"Failed: {str(e)}"}
|
return {"error": f"Failed: {str(e)}"}
|
||||||
|
|
||||||
# ---------------- RESOLVE ----------------
|
|
||||||
|
|
||||||
async def resolve(self, anime_session: str, episode_session: str):
|
async def resolve(self, anime_session: str, episode_session: str):
|
||||||
play_url = f"{BASE_URL}/play/{anime_session}/{episode_session}"
|
play_url = f"{BASE_URL}/play/{anime_session}/{episode_session}"
|
||||||
page = await self.context.new_page()
|
page = await self.context.new_page()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await page.goto(play_url, wait_until="domcontentloaded")
|
# 1. Apply stealth to bypass Cloudflare
|
||||||
|
await Stealth().apply_stealth_async(page)
|
||||||
|
|
||||||
|
try:
|
||||||
|
await page.goto(play_url, wait_until="commit", timeout=45000)
|
||||||
|
except Exception as e:
|
||||||
|
if "Timeout" not in str(e):
|
||||||
|
raise e
|
||||||
|
|
||||||
|
# 2. Increase timeout to 30 seconds to give Cloudflare time to auto-resolve
|
||||||
await page.wait_for_selector(
|
await page.wait_for_selector(
|
||||||
"#resolutionMenu button",
|
"#resolutionMenu button", state="attached", timeout=30000
|
||||||
state="attached",
|
|
||||||
timeout=15000,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
anime_name, episode_num = await self._scrape_play_meta(page)
|
anime_name, episode_num = await self._scrape_play_meta(page)
|
||||||
res_data = await self._collect_buttons(page)
|
res_data = await self._collect_buttons(page)
|
||||||
|
|
||||||
|
# Don't close the page quite yet, pass it to _embed_to_m3u8 if needed
|
||||||
|
# Wait, actually we can close it here since _embed_to_m3u8 creates its own page.
|
||||||
await page.close()
|
await page.close()
|
||||||
page = None
|
page = None
|
||||||
|
|
||||||
@@ -544,33 +503,33 @@ class AnimePahe:
|
|||||||
tasks.append(resolve_one(best_sub))
|
tasks.append(resolve_one(best_sub))
|
||||||
if best_dub:
|
if best_dub:
|
||||||
tasks.append(resolve_one(best_dub))
|
tasks.append(resolve_one(best_dub))
|
||||||
|
|
||||||
results = await asyncio.gather(*tasks)
|
results = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
sub_result = results[0] if best_sub else None
|
|
||||||
dub_result = (
|
|
||||||
results[1]
|
|
||||||
if best_sub and best_dub
|
|
||||||
else (results[0] if best_dub else None)
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"anime": anime_session,
|
"anime": anime_session,
|
||||||
"episode": episode_session,
|
"episode": episode_session,
|
||||||
"anime_name": anime_name,
|
"anime_name": anime_name,
|
||||||
"episode_num": episode_num,
|
"episode_num": episode_num,
|
||||||
"sub": sub_result,
|
"sub": results[0] if best_sub else None,
|
||||||
"dub": dub_result,
|
"dub": results[1]
|
||||||
|
if best_sub and best_dub
|
||||||
|
else (results[0] if best_dub else None),
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"error": str(e)}
|
# 3. TAKE A SCREENSHOT ON FAILURE to see what blocked the bot
|
||||||
|
if page:
|
||||||
|
try:
|
||||||
|
await page.screenshot(path="debug_error.png", full_page=True)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return {
|
||||||
|
"error": str(e),
|
||||||
|
"hint": "Check debug_error.png to see what the browser got stuck on.",
|
||||||
|
}
|
||||||
finally:
|
finally:
|
||||||
if page:
|
if page:
|
||||||
await page.close()
|
await page.close()
|
||||||
|
|
||||||
# ---------------- SEASONS ----------------
|
|
||||||
|
|
||||||
async def get_seasons(self, anime_id: str) -> dict:
|
async def get_seasons(self, anime_id: str) -> dict:
|
||||||
url = f"{ANIWATCHTV_BASE}/{anime_id}"
|
url = f"{ANIWATCHTV_BASE}/{anime_id}"
|
||||||
page = await self.context.new_page()
|
page = await self.context.new_page()
|
||||||
@@ -588,51 +547,20 @@ class AnimePahe:
|
|||||||
|
|
||||||
seasons = await page.evaluate(f"""() => {{
|
seasons = await page.evaluate(f"""() => {{
|
||||||
const BASE = "{ANIWATCHTV_BASE}";
|
const BASE = "{ANIWATCHTV_BASE}";
|
||||||
const currentId = "{anime_id}";
|
|
||||||
const results =[];
|
const results =[];
|
||||||
const seen = new Set();
|
const seen = new Set();
|
||||||
|
const container = document.querySelector('.os-list') || document.querySelector('.seasons-block') || document.querySelector('[class*="os-list"]');
|
||||||
const block = (
|
|
||||||
document.querySelector('.os-list') ||
|
|
||||||
document.querySelector('.seasons-block') ||
|
|
||||||
document.querySelector('[class*="os-list"]') ||
|
|
||||||
document.querySelector('[class*="season-list"]')
|
|
||||||
);
|
|
||||||
|
|
||||||
const fallbackContainer = (() => {{
|
|
||||||
for (const el of document.querySelectorAll('*')) {{
|
|
||||||
if (/more\\s+seasons?/i.test(el.innerText?.trim() || '')) {{
|
|
||||||
let p = el.parentElement;
|
|
||||||
for (let i = 0; i < 5; i++) {{
|
|
||||||
if (!p) break;
|
|
||||||
if (p.querySelectorAll('a[href]').length > 0) return p;
|
|
||||||
p = p.parentElement;
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
return null;
|
|
||||||
}})();
|
|
||||||
|
|
||||||
const container = block || fallbackContainer;
|
|
||||||
if (!container) return results;
|
if (!container) return results;
|
||||||
|
|
||||||
for (const a of container.querySelectorAll('a[href]')) {{
|
for (const a of container.querySelectorAll('a[href]')) {{
|
||||||
const href = a.getAttribute('href') || '';
|
const href = a.getAttribute('href') || '';
|
||||||
const fullUrl = href.startsWith('http') ? href
|
const fullUrl = href.startsWith('http') ? href : BASE + href;
|
||||||
: href.startsWith('/') ? BASE + href
|
|
||||||
: null;
|
|
||||||
if (!fullUrl) continue;
|
if (!fullUrl) continue;
|
||||||
|
|
||||||
const slug = fullUrl.replace(/\\/$/, '').split('/').pop();
|
const slug = fullUrl.replace(/\\/$/, '').split('/').pop();
|
||||||
if (!slug || seen.has(slug)) continue;
|
if (!slug || seen.has(slug)) continue;
|
||||||
seen.add(slug);
|
seen.add(slug);
|
||||||
|
|
||||||
const numericMatch = slug.match(/-(\\d+)$/);
|
const title = (a.querySelector('span, [class*="title"]')?.innerText?.trim() || a.innerText?.trim() || slug);
|
||||||
const numericId = numericMatch ? numericMatch[1] : null;
|
|
||||||
|
|
||||||
const titleEl = a.querySelector('span, [class*="title"], [class*="name"]');
|
|
||||||
const title = (titleEl?.innerText?.trim() || a.innerText?.trim() || slug);
|
|
||||||
|
|
||||||
const posterEl = a.querySelector('.season-poster') || a.closest('li, div')?.querySelector('.season-poster');
|
const posterEl = a.querySelector('.season-poster') || a.closest('li, div')?.querySelector('.season-poster');
|
||||||
let poster = null;
|
let poster = null;
|
||||||
if (posterEl) {{
|
if (posterEl) {{
|
||||||
@@ -640,21 +568,12 @@ class AnimePahe:
|
|||||||
const bg2 = bg.split('url(').pop().split(')')[0].replace(/['"/]/g, '').trim();
|
const bg2 = bg.split('url(').pop().split(')')[0].replace(/['"/]/g, '').trim();
|
||||||
if (bg2 && bg2.startsWith('http')) poster = bg2;
|
if (bg2 && bg2.startsWith('http')) poster = bg2;
|
||||||
}}
|
}}
|
||||||
|
results.push({{ title, id: slug, url: fullUrl, poster }});
|
||||||
results.push({{ title, id: slug, numericId, url: fullUrl, poster }});
|
|
||||||
}}
|
}}
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
}}""")
|
}}""")
|
||||||
|
return {"id": anime_id, "total": len(seasons), "seasons": seasons}
|
||||||
return {
|
|
||||||
"id": anime_id,
|
|
||||||
"total": len(seasons),
|
|
||||||
"seasons": seasons,
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[get_seasons] ERROR: {e}")
|
|
||||||
return {"id": anime_id, "total": 0, "seasons": [], "error": str(e)}
|
return {"id": anime_id, "total": 0, "seasons": [], "error": str(e)}
|
||||||
finally:
|
finally:
|
||||||
await page.close()
|
await page.close()
|
||||||
@@ -672,22 +591,21 @@ async def lifespan(app: FastAPI):
|
|||||||
|
|
||||||
app = FastAPI(lifespan=lifespan)
|
app = FastAPI(lifespan=lifespan)
|
||||||
|
|
||||||
|
# 🔥 ENABLE CORS SO REACT CAN COMMUNICATE WITH THIS API 🔥
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=[
|
||||||
|
"*"
|
||||||
|
], # For dev, allows all origins. Change to your Vite URL in prod.
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/", response_class=JSONResponse)
|
@app.get("/", response_class=JSONResponse)
|
||||||
async def root():
|
async def root():
|
||||||
return {
|
return {"status": "ok"}
|
||||||
"status": "ok",
|
|
||||||
"routes": [
|
|
||||||
"/search?q=:title",
|
|
||||||
"/latest?p=:page",
|
|
||||||
"/info/:session",
|
|
||||||
"/ids/:session",
|
|
||||||
"/episodes/:session?p=:page&resolve=false|true",
|
|
||||||
"/resolve/:animeSession/:episodeSession",
|
|
||||||
"/seasons/:animeId",
|
|
||||||
"/poster?url=:cdnImageUrl",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/search")
|
@app.get("/search")
|
||||||
@@ -705,11 +623,6 @@ async def api_info(session: str):
|
|||||||
return await pahe.get_info(session)
|
return await pahe.get_info(session)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/ids/{session}")
|
|
||||||
async def api_ids(session: str):
|
|
||||||
return await pahe.get_ids(session)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/episodes/{session}")
|
@app.get("/episodes/{session}")
|
||||||
async def api_episodes(session: str, p: int = 1, resolve: bool = False):
|
async def api_episodes(session: str, p: int = 1, resolve: bool = False):
|
||||||
return await pahe.get_episodes(session, p, resolve)
|
return await pahe.get_episodes(session, p, resolve)
|
||||||
@@ -727,13 +640,11 @@ async def api_seasons(anime_id: str, request: Request):
|
|||||||
for season in data.get("seasons", []):
|
for season in data.get("seasons", []):
|
||||||
if season.get("poster"):
|
if season.get("poster"):
|
||||||
season["posterProxied"] = f"{base_url}/poster?url={season['poster']}"
|
season["posterProxied"] = f"{base_url}/poster?url={season['poster']}"
|
||||||
else:
|
|
||||||
season["posterProxied"] = None
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
@app.get("/poster")
|
@app.get("/poster")
|
||||||
async def api_poster(url: str = Query(..., description="CDN image URL to proxy")):
|
async def api_poster(url: str = Query(..., description="CDN image proxy")):
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client:
|
async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client:
|
||||||
resp = await client.get(
|
resp = await client.get(
|
||||||
@@ -744,14 +655,55 @@ async def api_poster(url: str = Query(..., description="CDN image URL to proxy")
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
content_type = resp.headers.get("content-type", "image/jpeg")
|
|
||||||
return Response(content=resp.content, media_type=content_type)
|
|
||||||
except Exception as e:
|
|
||||||
return Response(
|
return Response(
|
||||||
content=f"Failed to fetch image: {e}",
|
content=resp.content,
|
||||||
status_code=502,
|
media_type=resp.headers.get("content-type", "image/jpeg"),
|
||||||
media_type="text/plain",
|
|
||||||
)
|
)
|
||||||
|
except Exception as e:
|
||||||
|
return Response(content=f"Error: {e}", status_code=502)
|
||||||
|
|
||||||
|
|
||||||
|
# 🔥 NEW HLS PROXY TO BYPASS CORS & 403 🔥
|
||||||
|
@app.get("/hls-proxy")
|
||||||
|
async def hls_proxy(url: str, request: Request):
|
||||||
|
headers = {
|
||||||
|
"Referer": "https://kwik.cx/",
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122 Safari/537.36",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Helper function to stream chunks directly
|
||||||
|
async def stream_generator():
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
async with client.stream("GET", url, headers=headers) as resp:
|
||||||
|
async for chunk in resp.aiter_bytes():
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
# If it is an M3U8 Playlist, we need to rewrite its internal links to ALSO use the proxy
|
||||||
|
if url.split("?")[0].endswith(".m3u8"):
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||||||
|
resp = await client.get(url, headers=headers)
|
||||||
|
lines = resp.text.splitlines()
|
||||||
|
|
||||||
|
base_proxy = f"{str(request.base_url).rstrip('/')}/hls-proxy?url="
|
||||||
|
new_lines = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith("#") or not line.strip():
|
||||||
|
new_lines.append(line)
|
||||||
|
else:
|
||||||
|
# Merge relative paths (e.g. chunk1.ts) with the absolute url
|
||||||
|
absolute_url = urllib.parse.urljoin(url, line.strip())
|
||||||
|
# Wrap it in our proxy path
|
||||||
|
proxy_url = f"{base_proxy}{urllib.parse.quote(absolute_url)}"
|
||||||
|
new_lines.append(proxy_url)
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
content="\n".join(new_lines), media_type="application/vnd.apple.mpegurl"
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Stream the binary video chunk (.ts)
|
||||||
|
return StreamingResponse(stream_generator(), media_type="video/MP2T")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user