mirror of
https://huggingface.co/spaces/Catapang1989/movie-hub
synced 2026-04-17 16:11:45 +00:00
Update main.py
remove packs and add type series or movie
This commit is contained in:
205
main.py
205
main.py
@@ -381,6 +381,18 @@ class HDHubEngine:
|
|||||||
if tag_text.lower() not in junk_tags:
|
if tag_text.lower() not in junk_tags:
|
||||||
genres.append(tag_text)
|
genres.append(tag_text)
|
||||||
|
|
||||||
|
# --- DETERMINE IF MOVIE OR SERIES ---
|
||||||
|
content_type = "movie"
|
||||||
|
url_lower = url.lower()
|
||||||
|
if "-series-" in url_lower:
|
||||||
|
content_type = "series"
|
||||||
|
elif any(g.lower() in ["series", "web series", "tv show"] for g in genres):
|
||||||
|
content_type = "series"
|
||||||
|
elif soup.find("div", class_="episode-download-item") or soup.find(
|
||||||
|
id="episodes"
|
||||||
|
):
|
||||||
|
content_type = "series"
|
||||||
|
|
||||||
trailer_url = ""
|
trailer_url = ""
|
||||||
trailer_btn = soup.find(id="trailer-btn")
|
trailer_btn = soup.find(id="trailer-btn")
|
||||||
if trailer_btn and trailer_btn.get("data-trailer-url"):
|
if trailer_btn and trailer_btn.get("data-trailer-url"):
|
||||||
@@ -420,116 +432,133 @@ class HDHubEngine:
|
|||||||
break
|
break
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# PARSE PACKS & EPISODES
|
# PARSE LINKS BASED ON TYPE (Movie vs Series)
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
download_links = {"packs": [], "episodes": []}
|
download_links = {}
|
||||||
seen_pack_links = set()
|
|
||||||
seen_episode_links = set()
|
|
||||||
|
|
||||||
# PACKS
|
if content_type == "series":
|
||||||
for pack_item in soup.find_all("div", class_="download-item"):
|
# EPISODES PARSING (Ignores "Packs" completely)
|
||||||
header = pack_item.find("div", class_="download-header")
|
grouped_episodes = {}
|
||||||
if not header:
|
current_season_context = "Unknown Quality"
|
||||||
continue
|
seen_episode_links = set()
|
||||||
pack_name = re.sub(r"\s+", " ", header.text.replace("\n", " ").strip())
|
|
||||||
|
|
||||||
links_div = pack_item.find("div", class_="grid grid-cols-2 gap-2")
|
elements = soup.find_all(
|
||||||
if links_div:
|
lambda tag: (
|
||||||
pack_links_list = []
|
tag.name == "div"
|
||||||
for btn in links_div.find_all("a", class_="btn"):
|
and tag.get("class")
|
||||||
href, server_name = (
|
and (
|
||||||
btn.get("href"),
|
"episode-header" in tag.get("class")
|
||||||
btn.text.strip().replace("\xa0", "").strip(),
|
or "episode-download-item" in tag.get("class")
|
||||||
)
|
)
|
||||||
|
|
||||||
# --- EXCLUDE HUBDRIVE LINKS ---
|
|
||||||
if "hubdrive" in server_name.lower():
|
|
||||||
continue
|
|
||||||
|
|
||||||
uniq_key = (pack_name, server_name)
|
|
||||||
if href and uniq_key not in seen_pack_links:
|
|
||||||
seen_pack_links.add(uniq_key)
|
|
||||||
pack_links_list.append({"server": server_name, "url": href})
|
|
||||||
if pack_links_list:
|
|
||||||
download_links["packs"].append(
|
|
||||||
{"title": pack_name, "links": pack_links_list}
|
|
||||||
)
|
|
||||||
|
|
||||||
# EPISODES
|
|
||||||
grouped_episodes = {}
|
|
||||||
current_season_context = "Unknown Quality"
|
|
||||||
|
|
||||||
elements = soup.find_all(
|
|
||||||
lambda tag: (
|
|
||||||
tag.name == "div"
|
|
||||||
and tag.get("class")
|
|
||||||
and (
|
|
||||||
"episode-header" in tag.get("class")
|
|
||||||
or "episode-download-item" in tag.get("class")
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
for element in elements:
|
for element in elements:
|
||||||
classes = element.get("class", [])
|
classes = element.get("class", [])
|
||||||
|
|
||||||
if "episode-header" in classes:
|
if "episode-header" in classes:
|
||||||
current_season_context = re.sub(r"\s+", " ", element.text.strip())
|
current_season_context = re.sub(
|
||||||
if current_season_context not in grouped_episodes:
|
r"\s+", " ", element.text.strip()
|
||||||
grouped_episodes[current_season_context] = {}
|
)
|
||||||
|
if current_season_context not in grouped_episodes:
|
||||||
|
grouped_episodes[current_season_context] = {}
|
||||||
|
|
||||||
elif "episode-download-item" in classes:
|
elif "episode-download-item" in classes:
|
||||||
ep_info_tag = element.find("span", class_="badge-psa")
|
ep_info_tag = element.find("span", class_="badge-psa")
|
||||||
ep_num_str = re.sub(
|
ep_num_str = re.sub(
|
||||||
r"\s+",
|
r"\s+",
|
||||||
" ",
|
" ",
|
||||||
ep_info_tag.text.strip() if ep_info_tag else "Unknown Episode",
|
ep_info_tag.text.strip()
|
||||||
|
if ep_info_tag
|
||||||
|
else "Unknown Episode",
|
||||||
|
)
|
||||||
|
|
||||||
|
if current_season_context not in grouped_episodes:
|
||||||
|
grouped_episodes[current_season_context] = {}
|
||||||
|
if ep_num_str not in grouped_episodes[current_season_context]:
|
||||||
|
grouped_episodes[current_season_context][ep_num_str] = []
|
||||||
|
|
||||||
|
links_div = element.find("div", class_="episode-links")
|
||||||
|
if links_div:
|
||||||
|
for btn in links_div.find_all("a", class_="btn"):
|
||||||
|
href, server_name = (
|
||||||
|
btn.get("href"),
|
||||||
|
btn.text.strip().replace("\xa0", "").strip(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# EXCLUDE HUBDRIVE LINKS
|
||||||
|
if "hubdrive" in server_name.lower():
|
||||||
|
continue
|
||||||
|
|
||||||
|
uniq_key = (
|
||||||
|
current_season_context,
|
||||||
|
ep_num_str,
|
||||||
|
server_name,
|
||||||
|
)
|
||||||
|
if href and uniq_key not in seen_episode_links:
|
||||||
|
seen_episode_links.add(uniq_key)
|
||||||
|
grouped_episodes[current_season_context][
|
||||||
|
ep_num_str
|
||||||
|
].append({"server": server_name, "url": href})
|
||||||
|
|
||||||
|
# Format array
|
||||||
|
formatted_episodes = []
|
||||||
|
for season_quality, episodes_dict in grouped_episodes.items():
|
||||||
|
if not episodes_dict:
|
||||||
|
continue
|
||||||
|
ep_list = []
|
||||||
|
for ep_name, links in episodes_dict.items():
|
||||||
|
if links: # Remove empty episodes if HubDrive was the only link
|
||||||
|
ep_list.append({"episode": ep_name, "links": links})
|
||||||
|
|
||||||
|
if ep_list:
|
||||||
|
formatted_episodes.append(
|
||||||
|
{"season_quality": season_quality, "episodes": ep_list}
|
||||||
|
)
|
||||||
|
|
||||||
|
download_links["episodes"] = formatted_episodes
|
||||||
|
|
||||||
|
else:
|
||||||
|
# MOVIE PARSING (Grabs direct files/qualities)
|
||||||
|
movie_links = []
|
||||||
|
seen_movie_links = set()
|
||||||
|
|
||||||
|
for pack_item in soup.find_all("div", class_="download-item"):
|
||||||
|
header = pack_item.find("div", class_="download-header")
|
||||||
|
if not header:
|
||||||
|
continue
|
||||||
|
quality_name = re.sub(
|
||||||
|
r"\s+", " ", header.text.replace("\n", " ").strip()
|
||||||
)
|
)
|
||||||
|
|
||||||
if current_season_context not in grouped_episodes:
|
links_div = pack_item.find("div", class_="grid grid-cols-2 gap-2")
|
||||||
grouped_episodes[current_season_context] = {}
|
|
||||||
if ep_num_str not in grouped_episodes[current_season_context]:
|
|
||||||
grouped_episodes[current_season_context][ep_num_str] = []
|
|
||||||
|
|
||||||
links_div = element.find("div", class_="episode-links")
|
|
||||||
if links_div:
|
if links_div:
|
||||||
|
btn_links = []
|
||||||
for btn in links_div.find_all("a", class_="btn"):
|
for btn in links_div.find_all("a", class_="btn"):
|
||||||
href, server_name = (
|
href, server_name = (
|
||||||
btn.get("href"),
|
btn.get("href"),
|
||||||
btn.text.strip().replace("\xa0", "").strip(),
|
btn.text.strip().replace("\xa0", "").strip(),
|
||||||
)
|
)
|
||||||
|
|
||||||
# --- EXCLUDE HUBDRIVE LINKS ---
|
# EXCLUDE HUBDRIVE LINKS
|
||||||
if "hubdrive" in server_name.lower():
|
if "hubdrive" in server_name.lower():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
uniq_key = (current_season_context, ep_num_str, server_name)
|
uniq_key = (quality_name, server_name)
|
||||||
if href and uniq_key not in seen_episode_links:
|
if href and uniq_key not in seen_movie_links:
|
||||||
seen_episode_links.add(uniq_key)
|
seen_movie_links.add(uniq_key)
|
||||||
grouped_episodes[current_season_context][
|
btn_links.append({"server": server_name, "url": href})
|
||||||
ep_num_str
|
|
||||||
].append({"server": server_name, "url": href})
|
|
||||||
|
|
||||||
# Format array
|
if btn_links:
|
||||||
formatted_episodes = []
|
movie_links.append(
|
||||||
for season_quality, episodes_dict in grouped_episodes.items():
|
{"quality": quality_name, "links": btn_links}
|
||||||
if not episodes_dict:
|
)
|
||||||
continue
|
|
||||||
ep_list = []
|
|
||||||
for ep_name, links in episodes_dict.items():
|
|
||||||
# Only add the episode to the final array if there is at least one link (removes empty HubDrive-only episodes if any exist)
|
|
||||||
if links:
|
|
||||||
ep_list.append({"episode": ep_name, "links": links})
|
|
||||||
|
|
||||||
if ep_list:
|
download_links["movie_links"] = movie_links
|
||||||
formatted_episodes.append(
|
|
||||||
{"season_quality": season_quality, "episodes": ep_list}
|
|
||||||
)
|
|
||||||
|
|
||||||
download_links["episodes"] = formatted_episodes
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"title": title,
|
"title": title,
|
||||||
|
"type": content_type,
|
||||||
"year": year,
|
"year": year,
|
||||||
"score": score,
|
"score": score,
|
||||||
"genres": genres,
|
"genres": genres,
|
||||||
@@ -554,7 +583,7 @@ async def lifespan(app: FastAPI):
|
|||||||
await engine.client.aclose()
|
await engine.client.aclose()
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI(lifespan=lifespan, title="HDHub Scraper & Resolver v4.3")
|
app = FastAPI(lifespan=lifespan, title="HDHub Scraper & Resolver v4.4")
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
@@ -562,8 +591,8 @@ async def root_directory():
|
|||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
{
|
{
|
||||||
"name": "HDHub API Scraper & Resolver",
|
"name": "HDHub API Scraper & Resolver",
|
||||||
"version": "4.3",
|
"version": "4.4",
|
||||||
"features": "Added Search Endpoint. Native extraction for Score/Year/Genres/Cast. Auto-Pixeldrain resolver. HubDrive filtered.",
|
"features": "Type Detection (Movie/Series). Removed Series Packs. Added Search. HubDrive Filtered. Pixeldrain Auto-Converter.",
|
||||||
"endpoints": {
|
"endpoints": {
|
||||||
"/home": {
|
"/home": {
|
||||||
"description": "Fetch homepage or category-specific movie/series lists.",
|
"description": "Fetch homepage or category-specific movie/series lists.",
|
||||||
@@ -580,7 +609,7 @@ async def root_directory():
|
|||||||
"example": "/search?query=batman",
|
"example": "/search?query=batman",
|
||||||
},
|
},
|
||||||
"/info": {
|
"/info": {
|
||||||
"description": "Scrape full info (Metadata, Packs, Episodes). HubDrive links are automatically hidden.",
|
"description": "Scrape full info. Automatically detects Movie or Series and hides series 'Packs'. HubDrive links are hidden.",
|
||||||
"method": "GET",
|
"method": "GET",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"url": "(Required) The full HDHub post URL obtained from /home or /search."
|
"url": "(Required) The full HDHub post URL obtained from /home or /search."
|
||||||
|
|||||||
Reference in New Issue
Block a user