AnimepaheApi/lib/animepahe.js

const cloudscraper = require('cloudscraper');
const cheerio = require('cheerio');
const { randomUserAgent, extractM3U8FromText } = require('./utils');
const { spawn } = require('child_process');
const fs = require('fs').promises;
const os = require('os');
const path = require('path');

/**
 * AnimePahe scraper class
 */
class AnimePahe {
  constructor() {
    this.base = process.env.ANIMEPAHE_BASE || 'https://animepahe.com';
    this.headers = {
      'User-Agent': randomUserAgent(),
      'Cookie': '__ddg1_=;__ddg2_=',
      'Referer': `${this.base}/`
    };
  }

  /**
   * Get headers with a fresh user agent
   * @returns {Object} Headers object
   */
  getHeaders() {
    return {
      ...this.headers,
      'User-Agent': randomUserAgent()
    };
  }

  /**
   * Parse an API response that may be a JSON string or object
   * @param {string|Object} response - Raw response
   * @returns {Object} Parsed JSON object
   * @private
   */
  _parseJsonResponse(response) {
    if (typeof response === 'string') {
      return JSON.parse(response);
    }
    return response || {};
  }

  /**
   * Extract first matching array from a response object
   * @param {Object|Array} payload - Response payload
   * @param {Array<string>} keys - Candidate keys
   * @returns {Array} Array value or empty array
   * @private
   */
  _extractArray(payload, keys = []) {
    if (Array.isArray(payload)) {
      return payload;
    }

    for (const key of keys) {
      if (Array.isArray(payload?.[key])) {
        return payload[key];
      }
    }

    return [];
  }

  /**
   * Best-effort extraction of internal anime id used by release API
   * @param {string} html - Anime detail page html
   * @param {string} animeSession - Public anime session id
   * @returns {string} Internal id
   * @private
   */
  _extractAnimeInternalId(html, animeSession) {
    const $ = cheerio.load(html);

    const ogUrl = $('meta[property="og:url"]').attr('content');
    if (ogUrl) {
      const lastPart = ogUrl.split('/').filter(Boolean).pop();
      if (lastPart) {
        return lastPart.split('?')[0];
      }
    }

    const releaseIdMatch = html.match(/\/api\?m=release&id=([^"'&\s>]+)/i);
    if (releaseIdMatch?.[1]) {
      return releaseIdMatch[1];
    }

    const animeIdMatches = [
      /["']anime_id["']\s*[:=]\s*["']?([^"',\s<;]+)/i,
      /["']animeId["']\s*[:=]\s*["']?([^"',\s<;]+)/i,
      /\bid\s*[:=]\s*["']?([0-9]{1,10})["']?\s*,\s*["']?(?:title|poster|episodes?)\b/i
    ];

    for (const pattern of animeIdMatches) {
      const match = html.match(pattern);
      if (match?.[1]) {
        return match[1];
      }
    }

    // Fallback keeps current behavior for old layouts.
    return animeSession;
  }

  /**
   * Convert noisy upstream errors into concise API-safe messages
   * @param {string} context - Operation context (search, episodes, etc)
   * @param {Error|any} error - Raw error
   * @returns {string} Public error message
   * @private
   */
  _formatUpstreamError(context, error) {
    const rawMessage = String(error?.message || error || 'Unknown error');
    const statusMatch = rawMessage.match(/^(\d{3})\s*-\s*/);
    const statusCode = statusMatch ? parseInt(statusMatch[1], 10) : null;

    if (statusCode === 404 || /Oops\.\.\.\s*404|404\s+Not\s+Found/i.test(rawMessage)) {
      if (context === 'episodes') {
        return 'Anime session not found. Use /search first to get a valid session id.';
      }
      if (context === 'sources') {
        return 'Anime or episode session not found. Use /episodes first to get a valid episode_session.';
      }
      if (context === 'ids') {
        return 'Anime session not found. Use /search first to get a valid session id.';
      }
    }

    if (statusCode === 403 && /ddos-guard|checking your browser|cloudflare/i.test(rawMessage)) {
      return 'Upstream blocked the request (anti-bot challenge). Please retry shortly.';
    }

    let cleaned = rawMessage
      .replace(/<script[\s\S]*?<\/script>/gi, ' ')
      .replace(/<style[\s\S]*?<\/style>/gi, ' ')
      .replace(/<[^>]+>/g, ' ')
      .replace(/\s+/g, ' ')
      .replace(/^\d{3}\s*-\s*"?/, '')
      .replace(/"$/, '')
      .trim();

    if (!cleaned) {
      cleaned = 'Unexpected upstream error';
    }

    if (cleaned.length > 220) {
      cleaned = `${cleaned.slice(0, 220)}...`;
    }

    return cleaned;
  }

  /**
   * Search for anime by query
   * @param {string} query - Search query
   * @returns {Promise<Array>} Array of anime results
   */
  async search(query) {
    const url = `${this.base}/api?m=search&q=${encodeURIComponent(query)}`;

    try {
      const response = await cloudscraper.get(url, {
        headers: this.getHeaders()
      });

      const data = this._parseJsonResponse(response);
      const animeRows = this._extractArray(data, ['data', 'results', 'items', 'animes', 'list']);
      const results = [];

      for (const anime of animeRows) {
        const session = anime.session || anime.slug || anime.anime_session || null;
        const resolutionSafeTitle = anime.title || anime.name || anime.anime_title || anime.title_en || anime.title_romaji || null;

        results.push({
          id: anime.id || anime.anime_id || anime.aid || null,
          title: resolutionSafeTitle,
          url: anime.url || (session ? `${this.base}/anime/${session}` : null),
          year: anime.year || anime.release_year || null,
          poster: anime.poster || anime.poster_url || anime.image || anime.cover || null,
          type: anime.type || anime.media_type || anime.format || null,
          session
        });
      }

      return results;
    } catch (error) {
      throw new Error(`Search failed: ${this._formatUpstreamError('search', error)}`);
    }
  }

  /**
   * Get episodes for an anime
   * @param {string} animeSession - Anime session ID
   * @returns {Promise<Array>} Array of episodes
   */
  async getEpisodes(animeSession) {
    try {
      // Fetch anime page to get internal ID
      const animePageUrl = `${this.base}/anime/${animeSession}`;
      const html = await cloudscraper.get(animePageUrl, {
        headers: this.getHeaders()
      });

      const tempId = this._extractAnimeInternalId(html, animeSession);

      // Fetch first page to get pagination info
      const firstPageUrl = `${this.base}/api?m=release&id=${encodeURIComponent(tempId)}&sort=episode_asc&page=1`;
      const firstPageResponse = await cloudscraper.get(firstPageUrl, {
        headers: this.getHeaders()
      });

      const firstPageData = this._parseJsonResponse(firstPageResponse);

      let episodes = this._extractArray(firstPageData, ['data', 'results', 'items', 'episodes']);
      const lastPage = Number(
        firstPageData.last_page ||
        firstPageData.lastPage ||
        firstPageData.total_pages ||
        firstPageData.pages ||
        1
      ) || 1;

      // Fetch remaining pages concurrently
      if (lastPage > 1) {
        const pagePromises = [];
        for (let page = 2; page <= lastPage; page++) {
          const pageUrl = `${this.base}/api?m=release&id=${encodeURIComponent(tempId)}&sort=episode_asc&page=${page}`;
          pagePromises.push(
            cloudscraper.get(pageUrl, { headers: this.getHeaders() })
              .then(response => {
                const data = this._parseJsonResponse(response);
                return this._extractArray(data, ['data', 'results', 'items', 'episodes']);
              })
          );
        }

        const additionalPages = await Promise.all(pagePromises);
        for (const pageData of additionalPages) {
          episodes = episodes.concat(pageData);
        }
      }

      // Transform to Episode format
      const formattedEpisodes = episodes.map(ep => ({
        id: ep.id || ep.release_id || null,
        number: Number(ep.episode ?? ep.number ?? ep.ep ?? ep.ep_num),
        title: ep.title || ep.episode_title || `Episode ${ep.episode ?? ep.number ?? ''}`.trim(),
        snapshot: ep.snapshot || ep.thumbnail || ep.image || null,
        session: ep.session || ep.release_session || null
      }));

      // Sort by episode number ascending
      formattedEpisodes.sort((a, b) => a.number - b.number);

      return formattedEpisodes;
    } catch (error) {
      throw new Error(`Failed to get episodes: ${this._formatUpstreamError('episodes', error)}`);
    }
  }

  /**
   * Get streaming sources for an episode
   * @param {string} animeSession - Anime session ID
   * @param {string} episodeSession - Episode session ID
   * @returns {Promise<Array>} Array of streaming sources
   */
  async getSources(animeSession, episodeSession) {
    try {
      const playUrl = `${this.base}/play/${animeSession}/${episodeSession}`;
      const html = await cloudscraper.get(playUrl, {
        headers: this.getHeaders()
      });

      const sources = [];
      const $ = cheerio.load(html);

      // Primary extraction: parse structured data attributes without relying on attribute order.
      $('[data-src]').each((_, el) => {
        const src = ($(el).attr('data-src') || '').trim();
        if (!src || !/https?:\/\/kwik\./i.test(src)) {
          return;
        }

        const rawResolution = ($(el).attr('data-resolution') || $(el).attr('data-res') || '').trim();
        const hasResolution = /\d/.test(rawResolution);
        const quality = hasResolution
          ? (rawResolution.toLowerCase().endsWith('p') ? rawResolution : `${rawResolution}p`)
          : null;

        sources.push({
          url: src,
          quality,
          fansub: ($(el).attr('data-fansub') || $(el).attr('data-fansub-id') || null),
          audio: ($(el).attr('data-audio') || $(el).attr('data-lang') || null)
        });
      });

      // Fallback: extract kwik links directly
      if (sources.length === 0) {
        const kwikPattern = /https?:\/\/kwik\.[a-z]+\/(?:e|f|d)\/[A-Za-z0-9_-]+/gi;
        let kwikMatch;
        while ((kwikMatch = kwikPattern.exec(html)) !== null) {
          sources.push({
            url: kwikMatch[0],
            quality: null,
            fansub: null,
            audio: null
          });
        }
      }

      if (sources.length === 0) {
        throw new Error('No kwik links found on play page');
      }

      // Deduplicate sources by URL
      const uniqueSourcesMap = new Map();
      for (const source of sources) {
        if (!uniqueSourcesMap.has(source.url)) {
          uniqueSourcesMap.set(source.url, source);
        }
      }
      const uniqueSources = Array.from(uniqueSourcesMap.values());

      // Sort by resolution descending
      uniqueSources.sort((a, b) => {
        const getResolution = (source) => {
          if (!source.quality) return 0;
          const match = source.quality.match(/(\d+)/);
          return match ? parseInt(match[1]) : 0;
        };
        return getResolution(b) - getResolution(a);
      });

      return uniqueSources;
    } catch (error) {
      throw new Error(`Failed to get sources: ${this._formatUpstreamError('sources', error)}`);
    }
  }

  /**
   * Resolve Kwik URL to M3U8 streaming URL
   * @param {string} kwikUrl - Kwik page URL
   * @returns {Promise<Object>} Object with m3u8 URL and required referer headers
   */
  async resolveKwikWithNode(kwikUrl) {
    try {
      // Extract referer from kwik URL - use full embed URL as referer
      const kwikUrlObj = new URL(kwikUrl);
      const kwikReferer = kwikUrl; // Full embed URL works better than just the host
      const kwikOrigin = `${kwikUrlObj.protocol}//${kwikUrlObj.host}`;

      // Fetch Kwik page
      const html = await cloudscraper.get(kwikUrl, {
        headers: this.getHeaders(),
        timeout: 20000
      });

      // Check for direct M3U8 URL in HTML
      const directM3u8 = extractM3U8FromText(html);
      if (directM3u8) {
        return { m3u8: directM3u8, referer: kwikReferer, origin: kwikOrigin };
      }

      // Extract script blocks containing eval()
      const scriptPattern = /<script[^>]*>([\s\S]*?)<\/script>/gi;
      const scripts = [];
      let scriptMatch;

      while ((scriptMatch = scriptPattern.exec(html)) !== null) {
        scripts.push(scriptMatch[1]);
      }

      // Find the best candidate script
      let scriptBlock = null;
      let largestEvalScript = null;
      let maxLen = 0;

      for (const script of scripts) {
        if (script.includes('eval(')) {
          if (script.includes('source') || script.includes('.m3u8') || script.includes('Plyr')) {
            scriptBlock = script;
            break;
          }
          if (script.length > maxLen) {
            maxLen = script.length;
            largestEvalScript = script;
          }
        }
      }

      if (!scriptBlock) {
        scriptBlock = largestEvalScript;
      }

      if (!scriptBlock) {
        // Try data-src attribute as fallback
        const dataSrcPattern = /data-src="([^"]+\.m3u8[^"]*)"/;
        const dataSrcMatch = html.match(dataSrcPattern);
        if (dataSrcMatch) {
          return { m3u8: dataSrcMatch[1], referer: kwikReferer, origin: kwikOrigin };
        }
        throw new Error('No candidate <script> block found to evaluate');
      }

      // Transform script for Node.js execution
      let transformedScript = scriptBlock.replace(/\bdocument\b/g, 'DOC_STUB');
      transformedScript = transformedScript.replace(/^(var|const|let|j)\s*q\s*=/gm, 'window.q = ');
      transformedScript += '\ntry { console.log(window.q); } catch(e) { console.log("Variable q not found"); }';

      // Create temporary file
      const tmpDir = os.tmpdir();
      const tmpFile = path.join(tmpDir, `kwik-${Date.now()}-${Math.random().toString(36).substr(2, 9)}.js`);

      const wrapperCode = `
globalThis.window = { location: {} };
globalThis.document = { cookie: '' };
const DOC_STUB = globalThis.document;
globalThis.navigator = { userAgent: 'mozilla' };
${transformedScript}
`;

      await fs.writeFile(tmpFile, wrapperCode, 'utf8');

      // Execute with Node.js
      const nodeOutput = await this._executeNodeScript(tmpFile);

      // Clean up temp file
      try {
        await fs.unlink(tmpFile);
      } catch (e) {
        // Ignore cleanup errors
      }

      // Extract M3U8 from output
      const m3u8FromOutput = extractM3U8FromText(nodeOutput);
      if (m3u8FromOutput) {
        return { m3u8: m3u8FromOutput, referer: kwikReferer, origin: kwikOrigin };
      }

      throw new Error(`Could not resolve .m3u8. Node output (first 2000 chars):\n${nodeOutput.substring(0, 2000)}`);
    } catch (error) {
      throw new Error(`Failed to resolve Kwik URL: ${this._formatUpstreamError('m3u8', error)}`);
    }
  }

  /**
   * Get external IDs (AniList, MyAnimeList) for an anime
   * @param {string} animeSession - Anime session ID
   * @returns {Promise<Object>} Object with anilist and myanimelist IDs
   */
  async getIds(animeSession) {
    try {
      const animePageUrl = `${this.base}/anime/${animeSession}`;
      const html = await cloudscraper.get(animePageUrl, {
        headers: this.getHeaders()
      });

      const $ = cheerio.load(html);
      const anilistId = $('meta[name="anilist"]').attr('content') || null;
      const malId = $('meta[name="myanimelist"]').attr('content') || null;

      return {
        anilist: anilistId ? parseInt(anilistId, 10) : null,
        myanimelist: malId ? parseInt(malId, 10) : null
      };
    } catch (error) {
      throw new Error(`Failed to get IDs: ${this._formatUpstreamError('ids', error)}`);
    }
  }

  /**
   * Execute a Node.js script and capture output
   * @param {string} scriptPath - Path to script file
   * @returns {Promise<string>} Script output
   * @private
   */
  async _executeNodeScript(scriptPath) {
    return new Promise((resolve, reject) => {
      const nodeProcess = spawn('node', [scriptPath]);
      let stdout = '';
      let stderr = '';

      nodeProcess.stdout.on('data', (data) => {
        stdout += data.toString();
      });

      nodeProcess.stderr.on('data', (data) => {
        stderr += data.toString();
      });

      nodeProcess.on('close', (code) => {
        const output = stdout + (stderr ? '\n[stderr]\n' + stderr : '');
        resolve(output);
      });

      nodeProcess.on('error', (error) => {
        reject(error);
      });
    });
  }
}

module.exports = AnimePahe;