mirror of
https://huggingface.co/spaces/Catapang1989/aniscrap
synced 2026-04-17 15:51:46 +00:00
Update main.py
This commit is contained in:
266
main.py
266
main.py
@@ -24,7 +24,6 @@ KITSU_HEADERS = {
|
||||
"Content-Type": "application/vnd.api+json",
|
||||
}
|
||||
|
||||
# Direct relation types (shown first)
|
||||
DIRECT_RELATION_TYPES = {"sequel", "prequel", "parent", "full_story", "side_story"}
|
||||
|
||||
|
||||
@@ -156,10 +155,6 @@ class AnimePahe:
|
||||
# ---------------- KITSU RELATIONS ----------------
|
||||
|
||||
async def _fetch_kitsu_relations(self, kitsu_id: str) -> list:
|
||||
"""
|
||||
Fetch ALL related anime from Kitsu — full chain including all seasons,
|
||||
movies, OVAs, specials. Direct types listed first.
|
||||
"""
|
||||
if kitsu_id in _kitsu_relations_cache:
|
||||
return _kitsu_relations_cache[kitsu_id]
|
||||
|
||||
@@ -179,7 +174,6 @@ class AnimePahe:
|
||||
_kitsu_relations_cache[kitsu_id] = []
|
||||
return []
|
||||
|
||||
# Build lookup of included resources by id
|
||||
included = {}
|
||||
for item in data.get("included", []):
|
||||
included[item["id"]] = item
|
||||
@@ -190,14 +184,12 @@ class AnimePahe:
|
||||
for rel in data.get("data", []):
|
||||
attrs = rel.get("attributes", {})
|
||||
role = (attrs.get("role") or "").lower()
|
||||
|
||||
dest_data = (
|
||||
rel.get("relationships", {}).get("destination", {}).get("data", {})
|
||||
)
|
||||
dest_type = dest_data.get("type", "")
|
||||
dest_id = dest_data.get("id", "")
|
||||
|
||||
# Only include anime destinations
|
||||
if dest_type != "anime":
|
||||
continue
|
||||
|
||||
@@ -230,6 +222,58 @@ class AnimePahe:
|
||||
_kitsu_relations_cache[kitsu_id] = combined
|
||||
return combined
|
||||
|
||||
# ---------------- SHARED RESOLVE HELPERS ----------------
|
||||
|
||||
async def _collect_buttons(self, page) -> list:
|
||||
"""
|
||||
Read all #resolutionMenu buttons.
|
||||
Returns list with embed URL, resolution (int), fansub, audio type.
|
||||
data-audio="jpn" → sub, data-audio="eng" → dub
|
||||
"""
|
||||
buttons = await page.locator("#resolutionMenu button").all()
|
||||
res_data = []
|
||||
for btn in buttons:
|
||||
text = (await btn.inner_text()).strip()
|
||||
res_match = re.search(r"(\d+)", text)
|
||||
audio_lang = (await btn.get_attribute("data-audio") or "jpn").lower()
|
||||
audio_type = "dub" if audio_lang == "eng" else "sub"
|
||||
res_data.append(
|
||||
{
|
||||
"embed": await btn.get_attribute("data-src"),
|
||||
"res": int(res_match.group(1)) if res_match else 720,
|
||||
"fansub": text.split("·")[0].strip() if "·" in text else "Unknown",
|
||||
"audio": audio_type,
|
||||
"audio_lang": audio_lang,
|
||||
}
|
||||
)
|
||||
return res_data
|
||||
|
||||
async def _embed_to_m3u8(self, embed_url: str) -> Optional[str]:
|
||||
"""Open embed URL and capture the first .m3u8 network request."""
|
||||
p = await self.context.new_page()
|
||||
m3u8 = None
|
||||
|
||||
def capture(req):
|
||||
nonlocal m3u8
|
||||
if ".m3u8" in req.url:
|
||||
m3u8 = req.url
|
||||
|
||||
p.on("request", capture)
|
||||
try:
|
||||
await p.set_extra_http_headers({"Referer": BASE_URL})
|
||||
await p.goto(embed_url, wait_until="domcontentloaded")
|
||||
for _ in range(10):
|
||||
if m3u8:
|
||||
break
|
||||
await p.evaluate(
|
||||
"document.querySelectorAll('button, video, [class*=play]')"
|
||||
".forEach(el => el.click())"
|
||||
)
|
||||
await asyncio.sleep(0.5)
|
||||
finally:
|
||||
await p.close()
|
||||
return m3u8
|
||||
|
||||
# ---------------- ANILIST ----------------
|
||||
|
||||
async def _fetch_anilist(self, anilist_id: str) -> dict:
|
||||
@@ -356,19 +400,16 @@ class AnimePahe:
|
||||
if not media:
|
||||
return {"error": "AniList returned no data"}
|
||||
|
||||
# MAL synopsis
|
||||
mal_id = str(media.get("idMal") or "")
|
||||
mal_synopsis = await self._fetch_mal_synopsis(mal_id) if mal_id else None
|
||||
synopsis = mal_synopsis or media.get("description")
|
||||
|
||||
# Format dates
|
||||
def fmt_date(d):
|
||||
if not d or not d.get("year"):
|
||||
return None
|
||||
parts = [d.get("year"), d.get("month"), d.get("day")]
|
||||
return "-".join(str(p).zfill(2) for p in parts if p)
|
||||
|
||||
# Trailer
|
||||
trailer = None
|
||||
if media.get("trailer"):
|
||||
t = media["trailer"]
|
||||
@@ -377,7 +418,6 @@ class AnimePahe:
|
||||
elif t.get("site") == "dailymotion":
|
||||
trailer = f"https://www.dailymotion.com/video/{t['id']}"
|
||||
|
||||
# Recommendations
|
||||
recommendations = []
|
||||
for node in media.get("recommendations", {}).get("nodes", []):
|
||||
rec = node.get("mediaRecommendation")
|
||||
@@ -398,7 +438,6 @@ class AnimePahe:
|
||||
}
|
||||
)
|
||||
|
||||
# Characters
|
||||
characters = []
|
||||
for edge in media.get("characters", {}).get("edges", []):
|
||||
node = edge.get("node", {})
|
||||
@@ -419,7 +458,6 @@ class AnimePahe:
|
||||
}
|
||||
)
|
||||
|
||||
# Staff
|
||||
staff = []
|
||||
for edge in media.get("staff", {}).get("edges", []):
|
||||
node = edge.get("node", {})
|
||||
@@ -479,7 +517,7 @@ class AnimePahe:
|
||||
],
|
||||
"characters": characters,
|
||||
"staff": staff,
|
||||
"relations": {}, # filled by get_info() from Kitsu
|
||||
"relations": {},
|
||||
"recommendations": recommendations,
|
||||
}
|
||||
|
||||
@@ -497,10 +535,6 @@ class AnimePahe:
|
||||
# ---------------- EPISODES ----------------
|
||||
|
||||
async def get_episodes(self, anime_id: str, p: int = 1, resolve: bool = False):
|
||||
"""
|
||||
Fetch episode list. If resolve=True, also resolve the highest-res
|
||||
stream URL and download link for each episode concurrently.
|
||||
"""
|
||||
data = await self._fetch_json(
|
||||
f"{BASE_URL}/api?m=release&id={anime_id}&sort=episode_desc&page={p}"
|
||||
)
|
||||
@@ -515,10 +549,8 @@ class AnimePahe:
|
||||
if not ep_session:
|
||||
return ep
|
||||
stream = await self._resolve_episode(anime_id, ep_session)
|
||||
ep["url"] = stream.get("url")
|
||||
ep["download"] = stream.get("download")
|
||||
ep["resolution"] = stream.get("resolution")
|
||||
ep["fansub"] = stream.get("fansub")
|
||||
ep["sub"] = stream.get("sub")
|
||||
ep["dub"] = stream.get("dub")
|
||||
return ep
|
||||
|
||||
data["data"] = list(await asyncio.gather(*[enrich(ep) for ep in episodes]))
|
||||
@@ -528,7 +560,6 @@ class AnimePahe:
|
||||
|
||||
async def get_info(self, session: str):
|
||||
try:
|
||||
# Step 1 — scrape IDs from AnimePahe
|
||||
ids = await self._scrape_ids(session)
|
||||
|
||||
anilist_id = ids.get("anilist")
|
||||
@@ -538,11 +569,9 @@ class AnimePahe:
|
||||
"ids": ids,
|
||||
}
|
||||
|
||||
# Step 2 — return from cache if already built
|
||||
if anilist_id in _info_cache:
|
||||
return _info_cache[anilist_id]
|
||||
|
||||
# Step 3 — fetch AniList data + Kitsu relations concurrently
|
||||
kitsu_id = ids.get("kitsu")
|
||||
|
||||
async def empty_relations():
|
||||
@@ -558,10 +587,7 @@ class AnimePahe:
|
||||
if "error" in data:
|
||||
return {"error": data["error"], "ids": ids}
|
||||
|
||||
# Step 4 — inject Kitsu relations under "Related"
|
||||
data["relations"] = {"Related": kitsu_relations} if kitsu_relations else {}
|
||||
|
||||
# Step 5 — inject all IDs
|
||||
data["ids"] = {
|
||||
"animepahe": ids.get("animepahe"),
|
||||
"anilist": anilist_id,
|
||||
@@ -572,7 +598,6 @@ class AnimePahe:
|
||||
"animePlanet": ids.get("animePlanet"),
|
||||
}
|
||||
|
||||
# Step 6 — cache fully merged result
|
||||
_info_cache[anilist_id] = data
|
||||
return data
|
||||
|
||||
@@ -580,13 +605,9 @@ class AnimePahe:
|
||||
print(f"[get_info] ERROR: {e}")
|
||||
return {"error": f"Failed: {str(e)}"}
|
||||
|
||||
# ---------------- RESOLVE (single episode → highest res only) ----------------
|
||||
# ---------------- _resolve_episode (used by get_episodes) ----------------
|
||||
|
||||
async def _resolve_episode(self, anime_session: str, episode_session: str) -> dict:
|
||||
"""
|
||||
Open the play page, collect all resolution buttons, resolve only the
|
||||
highest-resolution embed to its m3u8, and return url + download link.
|
||||
"""
|
||||
play_url = f"{BASE_URL}/play/{anime_session}/{episode_session}"
|
||||
page = await self.context.new_page()
|
||||
|
||||
@@ -597,82 +618,55 @@ class AnimePahe:
|
||||
state="attached",
|
||||
timeout=15000,
|
||||
)
|
||||
|
||||
buttons = await page.locator("#resolutionMenu button").all()
|
||||
res_data = []
|
||||
for btn in buttons:
|
||||
text = (await btn.inner_text()).strip()
|
||||
res_match = re.search(r"(\d+)", text)
|
||||
res_data.append(
|
||||
{
|
||||
"embed": await btn.get_attribute("data-src"),
|
||||
"res": int(res_match.group(1)) if res_match else 720,
|
||||
"fansub": text.split("·")[0].strip()
|
||||
if "·" in text
|
||||
else "Unknown",
|
||||
}
|
||||
)
|
||||
|
||||
res_data = await self._collect_buttons(page)
|
||||
await page.close()
|
||||
page = None
|
||||
|
||||
if not res_data:
|
||||
return {
|
||||
"url": None,
|
||||
"download": None,
|
||||
"resolution": None,
|
||||
"fansub": None,
|
||||
return {"sub": None, "dub": None}
|
||||
|
||||
subs = [r for r in res_data if r["audio"] == "sub"]
|
||||
dubs = [r for r in res_data if r["audio"] == "dub"]
|
||||
best_sub = max(subs, key=lambda x: x["res"]) if subs else None
|
||||
best_dub = max(dubs, key=lambda x: x["res"]) if dubs else None
|
||||
|
||||
result = {"sub": None, "dub": None}
|
||||
|
||||
async def resolve_one(item, key):
|
||||
m3u8 = await self._embed_to_m3u8(item["embed"])
|
||||
res_str = str(item["res"])
|
||||
result[key] = {
|
||||
"url": m3u8,
|
||||
"download": self._generate_mp4(m3u8, anime_session, res_str),
|
||||
"resolution": res_str,
|
||||
"fansub": item["fansub"],
|
||||
}
|
||||
|
||||
# Pick highest resolution
|
||||
best = max(res_data, key=lambda x: x["res"])
|
||||
tasks = []
|
||||
if best_sub:
|
||||
tasks.append(resolve_one(best_sub, "sub"))
|
||||
if best_dub:
|
||||
tasks.append(resolve_one(best_dub, "dub"))
|
||||
|
||||
# Resolve best embed to m3u8
|
||||
p = await self.context.new_page()
|
||||
m3u8 = None
|
||||
|
||||
def capture(req):
|
||||
nonlocal m3u8
|
||||
if ".m3u8" in req.url:
|
||||
m3u8 = req.url
|
||||
|
||||
p.on("request", capture)
|
||||
try:
|
||||
await p.set_extra_http_headers({"Referer": BASE_URL})
|
||||
await p.goto(best["embed"], wait_until="domcontentloaded")
|
||||
for _ in range(10):
|
||||
if m3u8:
|
||||
break
|
||||
await p.evaluate(
|
||||
"document.querySelectorAll('button, video, [class*=play]')"
|
||||
".forEach(el => el.click())"
|
||||
)
|
||||
await asyncio.sleep(0.5)
|
||||
finally:
|
||||
await p.close()
|
||||
|
||||
res_str = str(best["res"])
|
||||
return {
|
||||
"url": m3u8,
|
||||
"download": self._generate_mp4(m3u8, anime_session, res_str),
|
||||
"resolution": res_str,
|
||||
"fansub": best["fansub"],
|
||||
}
|
||||
await asyncio.gather(*tasks)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"url": None,
|
||||
"download": None,
|
||||
"resolution": None,
|
||||
"fansub": None,
|
||||
"error": str(e),
|
||||
}
|
||||
print(f"[_resolve_episode] ERROR: {e}")
|
||||
return {"sub": None, "dub": None, "error": str(e)}
|
||||
finally:
|
||||
if page:
|
||||
await page.close()
|
||||
|
||||
# ---------------- RESOLVE ----------------
|
||||
|
||||
async def resolve(self, anime_session: str, episode_session: str):
|
||||
"""Resolve all sources for a single episode (all resolutions)."""
|
||||
"""
|
||||
Resolve highest-res sub and dub for a single episode.
|
||||
Returns:
|
||||
sub: { resolution, fansub, audio, url, download }
|
||||
dub: { resolution, fansub, audio, url, download } or null if no dub
|
||||
"""
|
||||
play_url = f"{BASE_URL}/play/{anime_session}/{episode_session}"
|
||||
page = await self.context.new_page()
|
||||
|
||||
@@ -683,67 +677,55 @@ class AnimePahe:
|
||||
state="attached",
|
||||
timeout=15000,
|
||||
)
|
||||
|
||||
buttons = await page.locator("#resolutionMenu button").all()
|
||||
res_data = []
|
||||
for btn in buttons:
|
||||
text = (await btn.inner_text()).strip()
|
||||
res_match = re.search(r"(\d+)", text)
|
||||
res_data.append(
|
||||
{
|
||||
"embed": await btn.get_attribute("data-src"),
|
||||
"res": res_match.group(1) if res_match else "720",
|
||||
"fansub": text.split("·")[0].strip()
|
||||
if "·" in text
|
||||
else "Unknown",
|
||||
}
|
||||
)
|
||||
|
||||
res_data = await self._collect_buttons(page)
|
||||
await page.close()
|
||||
page = None
|
||||
|
||||
async def get_single_source(item):
|
||||
p = await self.context.new_page()
|
||||
m3u8 = None
|
||||
|
||||
def capture(req):
|
||||
nonlocal m3u8
|
||||
if ".m3u8" in req.url:
|
||||
m3u8 = req.url
|
||||
|
||||
p.on("request", capture)
|
||||
async def resolve_source(item):
|
||||
try:
|
||||
await p.set_extra_http_headers({"Referer": BASE_URL})
|
||||
await p.goto(item["embed"], wait_until="domcontentloaded")
|
||||
for _ in range(10):
|
||||
if m3u8:
|
||||
break
|
||||
await p.evaluate(
|
||||
"document.querySelectorAll('button, video, [class*=play]')"
|
||||
".forEach(el => el.click())"
|
||||
)
|
||||
await asyncio.sleep(0.5)
|
||||
m3u8 = await self._embed_to_m3u8(item["embed"])
|
||||
res_str = str(item["res"])
|
||||
return {
|
||||
"resolution": item["res"],
|
||||
"resolution": res_str,
|
||||
"fansub": item["fansub"],
|
||||
"audio": item["audio"],
|
||||
"audio_lang": item["audio_lang"],
|
||||
"url": m3u8,
|
||||
"download": self._generate_mp4(
|
||||
m3u8, anime_session, item["res"]
|
||||
),
|
||||
"download": self._generate_mp4(m3u8, anime_session, res_str),
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"resolution": item["res"],
|
||||
"resolution": str(item["res"]),
|
||||
"fansub": item["fansub"],
|
||||
"audio": item["audio"],
|
||||
"audio_lang": item["audio_lang"],
|
||||
"url": None,
|
||||
"download": None,
|
||||
"error": str(e),
|
||||
}
|
||||
finally:
|
||||
await p.close()
|
||||
|
||||
sources = await asyncio.gather(*[get_single_source(i) for i in res_data])
|
||||
return {"anime": anime_session, "sources": list(sources)}
|
||||
all_sources = list(
|
||||
await asyncio.gather(*[resolve_source(i) for i in res_data])
|
||||
)
|
||||
|
||||
sub_sources = [s for s in all_sources if s["audio"] == "sub"]
|
||||
dub_sources = [s for s in all_sources if s["audio"] == "dub"]
|
||||
|
||||
def best(sources):
|
||||
if not sources:
|
||||
return None
|
||||
return max(
|
||||
[s for s in sources if s["url"]],
|
||||
key=lambda x: int(x["resolution"]) if x["resolution"] else 0,
|
||||
default=None,
|
||||
)
|
||||
|
||||
return {
|
||||
"anime": anime_session,
|
||||
"episode": episode_session,
|
||||
"sub": best(sub_sources),
|
||||
"dub": best(dub_sources),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
Reference in New Issue
Block a user