mirror of
https://huggingface.co/spaces/Catapang1989/aniscrap
synced 2026-04-17 15:51:46 +00:00
Update main.py
This commit is contained in:
179
main.py
179
main.py
@@ -5,10 +5,12 @@ import os
|
|||||||
import httpx
|
import httpx
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI, Query, Request
|
||||||
|
from fastapi.responses import StreamingResponse, Response, JSONResponse
|
||||||
from playwright.async_api import async_playwright, BrowserContext
|
from playwright.async_api import async_playwright, BrowserContext
|
||||||
|
|
||||||
BASE_URL = "https://animepahe.si"
|
BASE_URL = "https://animepahe.si"
|
||||||
|
ANIWATCHTV_BASE = "https://aniwatchtv.to"
|
||||||
ANILIST_API = "https://graphql.anilist.co"
|
ANILIST_API = "https://graphql.anilist.co"
|
||||||
JIKAN_API = "https://api.jikan.moe/v4"
|
JIKAN_API = "https://api.jikan.moe/v4"
|
||||||
IS_HEADLESS = os.environ.get("HEADLESS", "true").lower() == "true"
|
IS_HEADLESS = os.environ.get("HEADLESS", "true").lower() == "true"
|
||||||
@@ -53,6 +55,10 @@ class AnimePahe:
|
|||||||
|
|
||||||
async def _intercept_assets(self, route):
|
async def _intercept_assets(self, route):
|
||||||
url = route.request.url.lower()
|
url = route.request.url.lower()
|
||||||
|
# Allow all requests from aniwatchtv so season posters can load
|
||||||
|
if "aniwatchtv.to" in url:
|
||||||
|
await route.continue_()
|
||||||
|
return
|
||||||
if any(ad in url for ad in self.ad_domains) or url.endswith(
|
if any(ad in url for ad in self.ad_domains) or url.endswith(
|
||||||
(".png", ".jpg", ".jpeg", ".webp", ".woff")
|
(".png", ".jpg", ".jpeg", ".webp", ".woff")
|
||||||
):
|
):
|
||||||
@@ -619,6 +625,108 @@ class AnimePahe:
|
|||||||
if page:
|
if page:
|
||||||
await page.close()
|
await page.close()
|
||||||
|
|
||||||
|
# ---------------- SEASONS ----------------
|
||||||
|
|
||||||
|
async def get_seasons(self, anime_id: str) -> dict:
|
||||||
|
"""
|
||||||
|
Scrape the 'More Seasons' section from aniwatchtv.to using the
|
||||||
|
existing Playwright browser context.
|
||||||
|
anime_id is the full slug, e.g. jujutsu-kaisen-the-culling-game-part-1-20401
|
||||||
|
"""
|
||||||
|
url = f"{ANIWATCHTV_BASE}/{anime_id}"
|
||||||
|
page = await self.context.new_page()
|
||||||
|
|
||||||
|
try:
|
||||||
|
await page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
||||||
|
|
||||||
|
# Short wait for lazy-loaded images and JS rendering
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
|
# Wait for season block — try common selectors gracefully
|
||||||
|
for selector in [".os-list", ".seasons-block", "[class*='season']", "main"]:
|
||||||
|
try:
|
||||||
|
await page.wait_for_selector(selector, timeout=5000)
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
seasons = await page.evaluate(f"""() => {{
|
||||||
|
const BASE = "{ANIWATCHTV_BASE}";
|
||||||
|
const currentId = "{anime_id}";
|
||||||
|
const results = [];
|
||||||
|
const seen = new Set();
|
||||||
|
|
||||||
|
// Strategy 1: dedicated season list block (.os-list or similar)
|
||||||
|
const block = (
|
||||||
|
document.querySelector('.os-list') ||
|
||||||
|
document.querySelector('.seasons-block') ||
|
||||||
|
document.querySelector('[class*="os-list"]') ||
|
||||||
|
document.querySelector('[class*="season-list"]')
|
||||||
|
);
|
||||||
|
|
||||||
|
// Strategy 2: find a heading that says "More Seasons" and walk up
|
||||||
|
const fallbackContainer = (() => {{
|
||||||
|
for (const el of document.querySelectorAll('*')) {{
|
||||||
|
if (/more\\s+seasons?/i.test(el.innerText?.trim() || '')) {{
|
||||||
|
let p = el.parentElement;
|
||||||
|
for (let i = 0; i < 5; i++) {{
|
||||||
|
if (!p) break;
|
||||||
|
if (p.querySelectorAll('a[href]').length > 0) return p;
|
||||||
|
p = p.parentElement;
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
return null;
|
||||||
|
}})();
|
||||||
|
|
||||||
|
const container = block || fallbackContainer;
|
||||||
|
if (!container) return results;
|
||||||
|
|
||||||
|
for (const a of container.querySelectorAll('a[href]')) {{
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
const fullUrl = href.startsWith('http') ? href
|
||||||
|
: href.startsWith('/') ? BASE + href
|
||||||
|
: null;
|
||||||
|
if (!fullUrl) continue;
|
||||||
|
|
||||||
|
const slug = fullUrl.replace(/\\/$/, '').split('/').pop();
|
||||||
|
// Include ALL slugs — current page included — dedupe only
|
||||||
|
if (!slug || seen.has(slug)) continue;
|
||||||
|
seen.add(slug);
|
||||||
|
|
||||||
|
const numericMatch = slug.match(/-(\\d+)$/);
|
||||||
|
const numericId = numericMatch ? numericMatch[1] : null;
|
||||||
|
|
||||||
|
const titleEl = a.querySelector('span, [class*="title"], [class*="name"]');
|
||||||
|
const title = (titleEl?.innerText?.trim() || a.innerText?.trim() || slug);
|
||||||
|
|
||||||
|
// Poster is in a sibling/child div.season-poster as a CSS background-image
|
||||||
|
const posterEl = a.querySelector('.season-poster') || a.closest('li, div')?.querySelector('.season-poster');
|
||||||
|
let poster = null;
|
||||||
|
if (posterEl) {{
|
||||||
|
const bg = posterEl.style.backgroundImage || window.getComputedStyle(posterEl).backgroundImage;
|
||||||
|
const bg2 = bg.split('url(').pop().split(')')[0].replace(/['"/]/g, '').trim();
|
||||||
|
if (bg2 && bg2.startsWith('http')) poster = bg2;
|
||||||
|
}}
|
||||||
|
|
||||||
|
results.push({{ title, id: slug, numericId, url: fullUrl, poster }});
|
||||||
|
}}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}}""")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"id": anime_id,
|
||||||
|
"total": len(seasons),
|
||||||
|
"seasons": seasons,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[get_seasons] ERROR: {e}")
|
||||||
|
return {"id": anime_id, "total": 0, "seasons": [], "error": str(e)}
|
||||||
|
finally:
|
||||||
|
await page.close()
|
||||||
|
|
||||||
|
|
||||||
pahe = AnimePahe()
|
pahe = AnimePahe()
|
||||||
|
|
||||||
@@ -633,6 +741,23 @@ async def lifespan(app: FastAPI):
|
|||||||
app = FastAPI(lifespan=lifespan)
|
app = FastAPI(lifespan=lifespan)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/", response_class=JSONResponse)
|
||||||
|
async def root():
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"routes": [
|
||||||
|
"/search?q=:title",
|
||||||
|
"/latest?p=:page",
|
||||||
|
"/info/:session",
|
||||||
|
"/ids/:session",
|
||||||
|
"/episodes/:session?p=:page&resolve=false|true",
|
||||||
|
"/resolve/:animeSession/:episodeSession",
|
||||||
|
"/seasons/:animeId - e.g. /seasons/jujutsu-kaisen-the-culling-game-part-1-20401",
|
||||||
|
"/poster?url=:cdnImageUrl - proxy hotlink-protected poster images",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@app.get("/search")
|
@app.get("/search")
|
||||||
async def api_search(q: str):
|
async def api_search(q: str):
|
||||||
return await pahe.search(q)
|
return await pahe.search(q)
|
||||||
@@ -663,6 +788,58 @@ async def api_resolve(anime: str, episode: str):
|
|||||||
return await pahe.resolve(anime, episode)
|
return await pahe.resolve(anime, episode)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/seasons/{anime_id:path}")
|
||||||
|
async def api_seasons(anime_id: str, request: Request):
|
||||||
|
"""
|
||||||
|
Scrape the More Seasons section from aniwatchtv.to.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
GET /seasons/jujutsu-kaisen-the-culling-game-part-1-20401
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
id - the slug passed in
|
||||||
|
total - number of other seasons found
|
||||||
|
seasons[] - list of { title, id, numericId, url, poster }
|
||||||
|
"""
|
||||||
|
data = await pahe.get_seasons(anime_id)
|
||||||
|
base_url = str(request.base_url).rstrip("/")
|
||||||
|
for season in data.get("seasons", []):
|
||||||
|
if season.get("poster"):
|
||||||
|
season["posterProxied"] = f"{base_url}/poster?url={season['poster']}"
|
||||||
|
else:
|
||||||
|
season["posterProxied"] = None
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/poster")
|
||||||
|
async def api_poster(url: str = Query(..., description="CDN image URL to proxy")):
|
||||||
|
"""
|
||||||
|
Proxy a hotlink-protected poster image with the correct Referer header.
|
||||||
|
Use this to display season/anime posters in the browser.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
GET /poster?url=https://cdn.noitatnemucod.net/thumbnail/100x200/100/abc123.jpg
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client:
|
||||||
|
resp = await client.get(
|
||||||
|
url,
|
||||||
|
headers={
|
||||||
|
"Referer": "https://aniwatchtv.to/",
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122 Safari/537.36",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
content_type = resp.headers.get("content-type", "image/jpeg")
|
||||||
|
return Response(content=resp.content, media_type=content_type)
|
||||||
|
except Exception as e:
|
||||||
|
return Response(
|
||||||
|
content=f"Failed to fetch image: {e}",
|
||||||
|
status_code=502,
|
||||||
|
media_type="text/plain",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user