Update main.py

remove packs and add type series or movie
2026-04-17 16:11:45 +00:00 · 2026-04-04 09:27:22 +00:00
parent 88cc85b98b
commit 330a833e00
1 changed files with 664 additions and 635 deletions
--- a/main.py
+++ b/main.py
@@ -381,6 +381,18 @@ class HDHubEngine:
                if tag_text.lower() not in junk_tags:
                    genres.append(tag_text)
            # --- DETERMINE IF MOVIE OR SERIES ---
            content_type = "movie"
            url_lower = url.lower()
            if "-series-" in url_lower:
                content_type = "series"
            elif any(g.lower() in ["series", "web series", "tv show"] for g in genres):
                content_type = "series"
            elif soup.find("div", class_="episode-download-item") or soup.find(
                id="episodes"
            ):
                content_type = "series"
            trailer_url = ""
            trailer_btn = soup.find(id="trailer-btn")
            if trailer_btn and trailer_btn.get("data-trailer-url"):
@@ -420,116 +432,133 @@ class HDHubEngine:
                    break
            # ---------------------------------------------------------
-            # PARSE PACKS & EPISODES
+            # PARSE LINKS BASED ON TYPE (Movie vs Series)
            # ---------------------------------------------------------
-            download_links = {"packs": [], "episodes": []}
+            download_links = {}
            seen_pack_links = set()
            seen_episode_links = set()
-            # PACKS
+            if content_type == "series":
-            for pack_item in soup.find_all("div", class_="download-item"):
+                # EPISODES PARSING (Ignores "Packs" completely)
-                header = pack_item.find("div", class_="download-header")
+                grouped_episodes = {}
-                if not header:
+                current_season_context = "Unknown Quality"
-                    continue
+                seen_episode_links = set()
                pack_name = re.sub(r"\s+", " ", header.text.replace("\n", " ").strip())
-                links_div = pack_item.find("div", class_="grid grid-cols-2 gap-2")
+                elements = soup.find_all(
-                if links_div:
+                    lambda tag: (
-                    pack_links_list = []
+                        tag.name == "div"
-                    for btn in links_div.find_all("a", class_="btn"):
+                        and tag.get("class")
-                        href, server_name = (
+                        and (
-                            btn.get("href"),
+                            "episode-header" in tag.get("class")
-                            btn.text.strip().replace("\xa0", "").strip(),
+                            or "episode-download-item" in tag.get("class")
                        )
                        # --- EXCLUDE HUBDRIVE LINKS ---
                        if "hubdrive" in server_name.lower():
                            continue
                        uniq_key = (pack_name, server_name)
                        if href and uniq_key not in seen_pack_links:
                            seen_pack_links.add(uniq_key)
                            pack_links_list.append({"server": server_name, "url": href})
                    if pack_links_list:
                        download_links["packs"].append(
                            {"title": pack_name, "links": pack_links_list}
                        )
            # EPISODES
            grouped_episodes = {}
            current_season_context = "Unknown Quality"
            elements = soup.find_all(
                lambda tag: (
                    tag.name == "div"
                    and tag.get("class")
                    and (
                        "episode-header" in tag.get("class")
                        or "episode-download-item" in tag.get("class")
                    )
                )
            )
-            for element in elements:
+                for element in elements:
-                classes = element.get("class", [])
+                    classes = element.get("class", [])
-                if "episode-header" in classes:
+                    if "episode-header" in classes:
-                    current_season_context = re.sub(r"\s+", " ", element.text.strip())
+                        current_season_context = re.sub(
-                    if current_season_context not in grouped_episodes:
+                            r"\s+", " ", element.text.strip()
-                        grouped_episodes[current_season_context] = {}
+                        )
                        if current_season_context not in grouped_episodes:
                            grouped_episodes[current_season_context] = {}
-                elif "episode-download-item" in classes:
+                    elif "episode-download-item" in classes:
-                    ep_info_tag = element.find("span", class_="badge-psa")
+                        ep_info_tag = element.find("span", class_="badge-psa")
-                    ep_num_str = re.sub(
+                        ep_num_str = re.sub(
-                        r"\s+",
+                            r"\s+",
-                        " ",
+                            " ",
-                        ep_info_tag.text.strip() if ep_info_tag else "Unknown Episode",
+                            ep_info_tag.text.strip()
                            if ep_info_tag
                            else "Unknown Episode",
                        )
                        if current_season_context not in grouped_episodes:
                            grouped_episodes[current_season_context] = {}
                        if ep_num_str not in grouped_episodes[current_season_context]:
                            grouped_episodes[current_season_context][ep_num_str] = []
                        links_div = element.find("div", class_="episode-links")
                        if links_div:
                            for btn in links_div.find_all("a", class_="btn"):
                                href, server_name = (
                                    btn.get("href"),
                                    btn.text.strip().replace("\xa0", "").strip(),
                                )
                                # EXCLUDE HUBDRIVE LINKS
                                if "hubdrive" in server_name.lower():
                                    continue
                                uniq_key = (
                                    current_season_context,
                                    ep_num_str,
                                    server_name,
                                )
                                if href and uniq_key not in seen_episode_links:
                                    seen_episode_links.add(uniq_key)
                                    grouped_episodes[current_season_context][
                                        ep_num_str
                                    ].append({"server": server_name, "url": href})
                # Format array
                formatted_episodes = []
                for season_quality, episodes_dict in grouped_episodes.items():
                    if not episodes_dict:
                        continue
                    ep_list = []
                    for ep_name, links in episodes_dict.items():
                        if links:  # Remove empty episodes if HubDrive was the only link
                            ep_list.append({"episode": ep_name, "links": links})
                    if ep_list:
                        formatted_episodes.append(
                            {"season_quality": season_quality, "episodes": ep_list}
                        )
                download_links["episodes"] = formatted_episodes
            else:
                # MOVIE PARSING (Grabs direct files/qualities)
                movie_links = []
                seen_movie_links = set()
                for pack_item in soup.find_all("div", class_="download-item"):
                    header = pack_item.find("div", class_="download-header")
                    if not header:
                        continue
                    quality_name = re.sub(
                        r"\s+", " ", header.text.replace("\n", " ").strip()
                    )
-                    if current_season_context not in grouped_episodes:
+                    links_div = pack_item.find("div", class_="grid grid-cols-2 gap-2")
                        grouped_episodes[current_season_context] = {}
                    if ep_num_str not in grouped_episodes[current_season_context]:
                        grouped_episodes[current_season_context][ep_num_str] = []
                    links_div = element.find("div", class_="episode-links")
                    if links_div:
                        btn_links = []
                        for btn in links_div.find_all("a", class_="btn"):
                            href, server_name = (
                                btn.get("href"),
                                btn.text.strip().replace("\xa0", "").strip(),
                            )
-                            # --- EXCLUDE HUBDRIVE LINKS ---
+                            # EXCLUDE HUBDRIVE LINKS
                            if "hubdrive" in server_name.lower():
                                continue
-                            uniq_key = (current_season_context, ep_num_str, server_name)
+                            uniq_key = (quality_name, server_name)
-                            if href and uniq_key not in seen_episode_links:
+                            if href and uniq_key not in seen_movie_links:
-                                seen_episode_links.add(uniq_key)
+                                seen_movie_links.add(uniq_key)
-                                grouped_episodes[current_season_context][
+                                btn_links.append({"server": server_name, "url": href})
                                    ep_num_str
                                ].append({"server": server_name, "url": href})
-            # Format array
+                        if btn_links:
-            formatted_episodes = []
+                            movie_links.append(
-            for season_quality, episodes_dict in grouped_episodes.items():
+                                {"quality": quality_name, "links": btn_links}
-                if not episodes_dict:
+                            )
                    continue
                ep_list = []
                for ep_name, links in episodes_dict.items():
                    # Only add the episode to the final array if there is at least one link (removes empty HubDrive-only episodes if any exist)
                    if links:
                        ep_list.append({"episode": ep_name, "links": links})
-                if ep_list:
+                download_links["movie_links"] = movie_links
                    formatted_episodes.append(
                        {"season_quality": season_quality, "episodes": ep_list}
                    )
            download_links["episodes"] = formatted_episodes
            return {
                "title": title,
                "type": content_type,
                "year": year,
                "score": score,
                "genres": genres,
@@ -554,7 +583,7 @@ async def lifespan(app: FastAPI):
    await engine.client.aclose()
-app = FastAPI(lifespan=lifespan, title="HDHub Scraper & Resolver v4.3")
+app = FastAPI(lifespan=lifespan, title="HDHub Scraper & Resolver v4.4")
@app.get("/")
@@ -562,8 +591,8 @@ async def root_directory():
    return JSONResponse(
        {
            "name": "HDHub API Scraper & Resolver",
-            "version": "4.3",
+            "version": "4.4",
-            "features": "Added Search Endpoint. Native extraction for Score/Year/Genres/Cast. Auto-Pixeldrain resolver. HubDrive filtered.",
+            "features": "Type Detection (Movie/Series). Removed Series Packs. Added Search. HubDrive Filtered. Pixeldrain Auto-Converter.",
            "endpoints": {
                "/home": {
                    "description": "Fetch homepage or category-specific movie/series lists.",
@@ -580,7 +609,7 @@ async def root_directory():
                    "example": "/search?query=batman",
                },
                "/info": {
-                    "description": "Scrape full info (Metadata, Packs, Episodes). HubDrive links are automatically hidden.",
+                    "description": "Scrape full info. Automatically detects Movie or Series and hides series 'Packs'. HubDrive links are hidden.",
                    "method": "GET",
                    "parameters": {
                        "url": "(Required) The full HDHub post URL obtained from /home or /search."