Update main.py

remove packs and add type series or movie
This commit is contained in:
Aira Catapang
2026-04-04 09:27:22 +00:00
committed by system
parent 88cc85b98b
commit 330a833e00

205
main.py
View File

@@ -381,6 +381,18 @@ class HDHubEngine:
if tag_text.lower() not in junk_tags: if tag_text.lower() not in junk_tags:
genres.append(tag_text) genres.append(tag_text)
# --- DETERMINE IF MOVIE OR SERIES ---
content_type = "movie"
url_lower = url.lower()
if "-series-" in url_lower:
content_type = "series"
elif any(g.lower() in ["series", "web series", "tv show"] for g in genres):
content_type = "series"
elif soup.find("div", class_="episode-download-item") or soup.find(
id="episodes"
):
content_type = "series"
trailer_url = "" trailer_url = ""
trailer_btn = soup.find(id="trailer-btn") trailer_btn = soup.find(id="trailer-btn")
if trailer_btn and trailer_btn.get("data-trailer-url"): if trailer_btn and trailer_btn.get("data-trailer-url"):
@@ -420,116 +432,133 @@ class HDHubEngine:
break break
# --------------------------------------------------------- # ---------------------------------------------------------
# PARSE PACKS & EPISODES # PARSE LINKS BASED ON TYPE (Movie vs Series)
# --------------------------------------------------------- # ---------------------------------------------------------
download_links = {"packs": [], "episodes": []} download_links = {}
seen_pack_links = set()
seen_episode_links = set()
# PACKS if content_type == "series":
for pack_item in soup.find_all("div", class_="download-item"): # EPISODES PARSING (Ignores "Packs" completely)
header = pack_item.find("div", class_="download-header") grouped_episodes = {}
if not header: current_season_context = "Unknown Quality"
continue seen_episode_links = set()
pack_name = re.sub(r"\s+", " ", header.text.replace("\n", " ").strip())
links_div = pack_item.find("div", class_="grid grid-cols-2 gap-2") elements = soup.find_all(
if links_div: lambda tag: (
pack_links_list = [] tag.name == "div"
for btn in links_div.find_all("a", class_="btn"): and tag.get("class")
href, server_name = ( and (
btn.get("href"), "episode-header" in tag.get("class")
btn.text.strip().replace("\xa0", "").strip(), or "episode-download-item" in tag.get("class")
) )
# --- EXCLUDE HUBDRIVE LINKS ---
if "hubdrive" in server_name.lower():
continue
uniq_key = (pack_name, server_name)
if href and uniq_key not in seen_pack_links:
seen_pack_links.add(uniq_key)
pack_links_list.append({"server": server_name, "url": href})
if pack_links_list:
download_links["packs"].append(
{"title": pack_name, "links": pack_links_list}
)
# EPISODES
grouped_episodes = {}
current_season_context = "Unknown Quality"
elements = soup.find_all(
lambda tag: (
tag.name == "div"
and tag.get("class")
and (
"episode-header" in tag.get("class")
or "episode-download-item" in tag.get("class")
) )
) )
)
for element in elements: for element in elements:
classes = element.get("class", []) classes = element.get("class", [])
if "episode-header" in classes: if "episode-header" in classes:
current_season_context = re.sub(r"\s+", " ", element.text.strip()) current_season_context = re.sub(
if current_season_context not in grouped_episodes: r"\s+", " ", element.text.strip()
grouped_episodes[current_season_context] = {} )
if current_season_context not in grouped_episodes:
grouped_episodes[current_season_context] = {}
elif "episode-download-item" in classes: elif "episode-download-item" in classes:
ep_info_tag = element.find("span", class_="badge-psa") ep_info_tag = element.find("span", class_="badge-psa")
ep_num_str = re.sub( ep_num_str = re.sub(
r"\s+", r"\s+",
" ", " ",
ep_info_tag.text.strip() if ep_info_tag else "Unknown Episode", ep_info_tag.text.strip()
if ep_info_tag
else "Unknown Episode",
)
if current_season_context not in grouped_episodes:
grouped_episodes[current_season_context] = {}
if ep_num_str not in grouped_episodes[current_season_context]:
grouped_episodes[current_season_context][ep_num_str] = []
links_div = element.find("div", class_="episode-links")
if links_div:
for btn in links_div.find_all("a", class_="btn"):
href, server_name = (
btn.get("href"),
btn.text.strip().replace("\xa0", "").strip(),
)
# EXCLUDE HUBDRIVE LINKS
if "hubdrive" in server_name.lower():
continue
uniq_key = (
current_season_context,
ep_num_str,
server_name,
)
if href and uniq_key not in seen_episode_links:
seen_episode_links.add(uniq_key)
grouped_episodes[current_season_context][
ep_num_str
].append({"server": server_name, "url": href})
# Format array
formatted_episodes = []
for season_quality, episodes_dict in grouped_episodes.items():
if not episodes_dict:
continue
ep_list = []
for ep_name, links in episodes_dict.items():
if links: # Remove empty episodes if HubDrive was the only link
ep_list.append({"episode": ep_name, "links": links})
if ep_list:
formatted_episodes.append(
{"season_quality": season_quality, "episodes": ep_list}
)
download_links["episodes"] = formatted_episodes
else:
# MOVIE PARSING (Grabs direct files/qualities)
movie_links = []
seen_movie_links = set()
for pack_item in soup.find_all("div", class_="download-item"):
header = pack_item.find("div", class_="download-header")
if not header:
continue
quality_name = re.sub(
r"\s+", " ", header.text.replace("\n", " ").strip()
) )
if current_season_context not in grouped_episodes: links_div = pack_item.find("div", class_="grid grid-cols-2 gap-2")
grouped_episodes[current_season_context] = {}
if ep_num_str not in grouped_episodes[current_season_context]:
grouped_episodes[current_season_context][ep_num_str] = []
links_div = element.find("div", class_="episode-links")
if links_div: if links_div:
btn_links = []
for btn in links_div.find_all("a", class_="btn"): for btn in links_div.find_all("a", class_="btn"):
href, server_name = ( href, server_name = (
btn.get("href"), btn.get("href"),
btn.text.strip().replace("\xa0", "").strip(), btn.text.strip().replace("\xa0", "").strip(),
) )
# --- EXCLUDE HUBDRIVE LINKS --- # EXCLUDE HUBDRIVE LINKS
if "hubdrive" in server_name.lower(): if "hubdrive" in server_name.lower():
continue continue
uniq_key = (current_season_context, ep_num_str, server_name) uniq_key = (quality_name, server_name)
if href and uniq_key not in seen_episode_links: if href and uniq_key not in seen_movie_links:
seen_episode_links.add(uniq_key) seen_movie_links.add(uniq_key)
grouped_episodes[current_season_context][ btn_links.append({"server": server_name, "url": href})
ep_num_str
].append({"server": server_name, "url": href})
# Format array if btn_links:
formatted_episodes = [] movie_links.append(
for season_quality, episodes_dict in grouped_episodes.items(): {"quality": quality_name, "links": btn_links}
if not episodes_dict: )
continue
ep_list = []
for ep_name, links in episodes_dict.items():
# Only add the episode to the final array if there is at least one link (removes empty HubDrive-only episodes if any exist)
if links:
ep_list.append({"episode": ep_name, "links": links})
if ep_list: download_links["movie_links"] = movie_links
formatted_episodes.append(
{"season_quality": season_quality, "episodes": ep_list}
)
download_links["episodes"] = formatted_episodes
return { return {
"title": title, "title": title,
"type": content_type,
"year": year, "year": year,
"score": score, "score": score,
"genres": genres, "genres": genres,
@@ -554,7 +583,7 @@ async def lifespan(app: FastAPI):
await engine.client.aclose() await engine.client.aclose()
app = FastAPI(lifespan=lifespan, title="HDHub Scraper & Resolver v4.3") app = FastAPI(lifespan=lifespan, title="HDHub Scraper & Resolver v4.4")
@app.get("/") @app.get("/")
@@ -562,8 +591,8 @@ async def root_directory():
return JSONResponse( return JSONResponse(
{ {
"name": "HDHub API Scraper & Resolver", "name": "HDHub API Scraper & Resolver",
"version": "4.3", "version": "4.4",
"features": "Added Search Endpoint. Native extraction for Score/Year/Genres/Cast. Auto-Pixeldrain resolver. HubDrive filtered.", "features": "Type Detection (Movie/Series). Removed Series Packs. Added Search. HubDrive Filtered. Pixeldrain Auto-Converter.",
"endpoints": { "endpoints": {
"/home": { "/home": {
"description": "Fetch homepage or category-specific movie/series lists.", "description": "Fetch homepage or category-specific movie/series lists.",
@@ -580,7 +609,7 @@ async def root_directory():
"example": "/search?query=batman", "example": "/search?query=batman",
}, },
"/info": { "/info": {
"description": "Scrape full info (Metadata, Packs, Episodes). HubDrive links are automatically hidden.", "description": "Scrape full info. Automatically detects Movie or Series and hides series 'Packs'. HubDrive links are hidden.",
"method": "GET", "method": "GET",
"parameters": { "parameters": {
"url": "(Required) The full HDHub post URL obtained from /home or /search." "url": "(Required) The full HDHub post URL obtained from /home or /search."