Harden AnimePahe response parsing and source extraction

2026-04-17 16:11:44 +00:00 · 2026-04-03 19:46:16 +05:30
parent a2a7ed8235
commit c0366dedef
1 changed files with 130 additions and 50 deletions
--- a/lib/animepahe.js
+++ b/lib/animepahe.js
@@ -11,11 +11,11 @@ const path = require('path');
 */
 class AnimePahe {
  constructor() {
-    this.base = 'https://animepahe.com';
+    this.base = process.env.ANIMEPAHE_BASE || 'https://animepahe.com';
    this.headers = {
      'User-Agent': randomUserAgent(),
      'Cookie': '__ddg1_=;__ddg2_=',
-      'Referer': 'https://animepahe.com/'
+      'Referer': `${this.base}/`
    };
  }

@@ -30,6 +30,80 @@ class AnimePahe {
    };
  }

+  /**
+   * Parse an API response that may be a JSON string or object
+   * @param {string|Object} response - Raw response
+   * @returns {Object} Parsed JSON object
+   * @private
+   */
+  _parseJsonResponse(response) {
+    if (typeof response === 'string') {
+      return JSON.parse(response);
+    }
+    return response || {};
+  }
+
+  /**
+   * Extract first matching array from a response object
+   * @param {Object|Array} payload - Response payload
+   * @param {Array<string>} keys - Candidate keys
+   * @returns {Array} Array value or empty array
+   * @private
+   */
+  _extractArray(payload, keys = []) {
+    if (Array.isArray(payload)) {
+      return payload;
+    }
+
+    for (const key of keys) {
+      if (Array.isArray(payload?.[key])) {
+        return payload[key];
+      }
+    }
+
+    return [];
+  }
+
+  /**
+   * Best-effort extraction of internal anime id used by release API
+   * @param {string} html - Anime detail page html
+   * @param {string} animeSession - Public anime session id
+   * @returns {string} Internal id
+   * @private
+   */
+  _extractAnimeInternalId(html, animeSession) {
+    const $ = cheerio.load(html);
+
+    const ogUrl = $('meta[property="og:url"]').attr('content');
+    if (ogUrl) {
+      const lastPart = ogUrl.split('/').filter(Boolean).pop();
+      if (lastPart) {
+        return lastPart.split('?')[0];
+      }
+    }
+
+    const releaseIdMatch = html.match(/\/api\?m=release&id=([^"'&\s>]+)/i);
+    if (releaseIdMatch?.[1]) {
+      return releaseIdMatch[1];
+    }
+
+    const animeIdMatches = [
+      /["']anime_id["']\s*[:=]\s*["']?([^"',\s<;]+)/i,
+      /["']animeId["']\s*[:=]\s*["']?([^"',\s<;]+)/i,
+      /\bid\s*[:=]\s*["']?([0-9]{1,10})["']?\s*,\s*["']?(?:title|poster|episodes?)\b/i
+    ];
+
+    for (const pattern of animeIdMatches) {
+      const match = html.match(pattern);
+      if (match?.[1]) {
+        return match[1];
+      }
+    }
+
+    // Fallback keeps current behavior for old layouts.
+    return animeSession;
+  }
+
  /**
   * Search for anime by query
   * @param {string} query - Search query
@@ -43,18 +117,22 @@ class AnimePahe {
        headers: this.getHeaders()
      });

-      const data = typeof response === 'string' ? JSON.parse(response) : response;
+      const data = this._parseJsonResponse(response);
+      const animeRows = this._extractArray(data, ['data', 'results', 'items', 'animes', 'list']);
      const results = [];

-      for (const anime of (data.data || [])) {
+      for (const anime of animeRows) {
+        const session = anime.session || anime.slug || anime.anime_session || null;
+        const resolutionSafeTitle = anime.title || anime.name || anime.anime_title || anime.title_en || anime.title_romaji || null;
+
        results.push({
-          id: anime.id,
-          title: anime.title,
-          url: `${this.base}/anime/${anime.session}`,
-          year: anime.year,
-          poster: anime.poster,
-          type: anime.type,
-          session: anime.session
+          id: anime.id || anime.anime_id || anime.aid || null,
+          title: resolutionSafeTitle,
+          url: anime.url || (session ? `${this.base}/anime/${session}` : null),
+          year: anime.year || anime.release_year || null,
+          poster: anime.poster || anime.poster_url || anime.image || anime.cover || null,
+          type: anime.type || anime.media_type || anime.format || null,
+          session
        });
      }

@@ -77,40 +155,35 @@ class AnimePahe {
        headers: this.getHeaders()
      });

-      // Parse HTML to extract meta tag
-      const $ = cheerio.load(html);
-      const metaTag = $('meta[property="og:url"]');
-      
-      if (!metaTag.length) {
-        throw new Error('Could not find session ID in meta tag');
-      }
-
-      const metaContent = metaTag.attr('content');
-      const tempId = metaContent.split('/').pop();
+      const tempId = this._extractAnimeInternalId(html, animeSession);

      // Fetch first page to get pagination info
-      const firstPageUrl = `${this.base}/api?m=release&id=${tempId}&sort=episode_asc&page=1`;
+      const firstPageUrl = `${this.base}/api?m=release&id=${encodeURIComponent(tempId)}&sort=episode_asc&page=1`;
      const firstPageResponse = await cloudscraper.get(firstPageUrl, {
        headers: this.getHeaders()
      });

-      const firstPageData = typeof firstPageResponse === 'string' 
-        ? JSON.parse(firstPageResponse) 
-        : firstPageResponse;
+      const firstPageData = this._parseJsonResponse(firstPageResponse);

-      let episodes = firstPageData.data || [];
-      const lastPage = firstPageData.last_page || 1;
+      let episodes = this._extractArray(firstPageData, ['data', 'results', 'items', 'episodes']);
+      const lastPage = Number(
+        firstPageData.last_page ||
+        firstPageData.lastPage ||
+        firstPageData.total_pages ||
+        firstPageData.pages ||
+        1
+      ) || 1;

      // Fetch remaining pages concurrently
      if (lastPage > 1) {
        const pagePromises = [];
        for (let page = 2; page <= lastPage; page++) {
-          const pageUrl = `${this.base}/api?m=release&id=${tempId}&sort=episode_asc&page=${page}`;
+          const pageUrl = `${this.base}/api?m=release&id=${encodeURIComponent(tempId)}&sort=episode_asc&page=${page}`;
          pagePromises.push(
            cloudscraper.get(pageUrl, { headers: this.getHeaders() })
              .then(response => {
-                const data = typeof response === 'string' ? JSON.parse(response) : response;
-                return data.data || [];
+                const data = this._parseJsonResponse(response);
+                return this._extractArray(data, ['data', 'results', 'items', 'episodes']);
              })
          );
        }
@@ -123,11 +196,11 @@ class AnimePahe {

      // Transform to Episode format
      const formattedEpisodes = episodes.map(ep => ({
-        id: ep.id,
-        number: ep.episode,
-        title: ep.title || `Episode ${ep.episode}`,
-        snapshot: ep.snapshot,
-        session: ep.session
+        id: ep.id || ep.release_id || null,
+        number: Number(ep.episode ?? ep.number ?? ep.ep ?? ep.ep_num),
+        title: ep.title || ep.episode_title || `Episode ${ep.episode ?? ep.number ?? ''}`.trim(),
+        snapshot: ep.snapshot || ep.thumbnail || ep.image || null,
+        session: ep.session || ep.release_session || null
      }));

      // Sort by episode number ascending
@@ -152,26 +225,33 @@ class AnimePahe {
        headers: this.getHeaders()
      });

-      // Extract button data attributes using regex
-      const buttonPattern = /<button[^>]+data-src="([^"]+)"[^>]+data-fansub="([^"]+)"[^>]+data-resolution="([^"]+)"[^>]+data-audio="([^"]+)"[^>]*>/g;
      const sources = [];
-      let match;
+      const $ = cheerio.load(html);

-      while ((match = buttonPattern.exec(html)) !== null) {
-        const [, src, fansub, resolution, audio] = match;
-        if (src.startsWith('https://kwik.')) {
-          sources.push({
-            url: src,
-            quality: `${resolution}p`,
-            fansub: fansub,
-            audio: audio
-          });
+      // Primary extraction: parse structured data attributes without relying on attribute order.
+      $('[data-src]').each((_, el) => {
+        const src = ($(el).attr('data-src') || '').trim();
+        if (!src || !/https?:\/\/kwik\./i.test(src)) {
+          return;
        }
-      }
+
+        const rawResolution = ($(el).attr('data-resolution') || $(el).attr('data-res') || '').trim();
+        const hasResolution = /\d/.test(rawResolution);
+        const quality = hasResolution
+          ? (rawResolution.toLowerCase().endsWith('p') ? rawResolution : `${rawResolution}p`)
+          : null;
+
+        sources.push({
+          url: src,
+          quality,
+          fansub: ($(el).attr('data-fansub') || $(el).attr('data-fansub-id') || null),
+          audio: ($(el).attr('data-audio') || $(el).attr('data-lang') || null)
+        });
+      });

      // Fallback: extract kwik links directly
      if (sources.length === 0) {
-        const kwikPattern = /https:\/\/kwik\.(si|cx|link)\/e\/\w+/g;
+        const kwikPattern = /https?:\/\/kwik\.[a-z]+\/(?:e|f|d)\/[A-Za-z0-9_-]+/gi;
        let kwikMatch;
        while ((kwikMatch = kwikPattern.exec(html)) !== null) {
          sources.push({
@@ -200,7 +280,7 @@ class AnimePahe {
      uniqueSources.sort((a, b) => {
        const getResolution = (source) => {
          if (!source.quality) return 0;
-          const match = source.quality.match(/(\d+)p/);
+          const match = source.quality.match(/(\d+)/);
          return match ? parseInt(match[1]) : 0;
        };
        return getResolution(b) - getResolution(a);