const cloudscraper = require('cloudscraper'); const cheerio = require('cheerio'); const { randomUserAgent, extractM3U8FromText } = require('./utils'); const { spawn } = require('child_process'); const fs = require('fs').promises; const os = require('os'); const path = require('path'); /** * AnimePahe scraper class */ class AnimePahe { constructor() { this.base = process.env.ANIMEPAHE_BASE || 'https://animepahe.com'; this.headers = { 'User-Agent': randomUserAgent(), 'Cookie': '__ddg1_=;__ddg2_=', 'Referer': `${this.base}/` }; } /** * Get headers with a fresh user agent * @returns {Object} Headers object */ getHeaders() { return { ...this.headers, 'User-Agent': randomUserAgent() }; } /** * Parse an API response that may be a JSON string or object * @param {string|Object} response - Raw response * @returns {Object} Parsed JSON object * @private */ _parseJsonResponse(response) { if (typeof response === 'string') { return JSON.parse(response); } return response || {}; } /** * Extract first matching array from a response object * @param {Object|Array} payload - Response payload * @param {Array} keys - Candidate keys * @returns {Array} Array value or empty array * @private */ _extractArray(payload, keys = []) { if (Array.isArray(payload)) { return payload; } for (const key of keys) { if (Array.isArray(payload?.[key])) { return payload[key]; } } return []; } /** * Best-effort extraction of internal anime id used by release API * @param {string} html - Anime detail page html * @param {string} animeSession - Public anime session id * @returns {string} Internal id * @private */ _extractAnimeInternalId(html, animeSession) { const $ = cheerio.load(html); const ogUrl = $('meta[property="og:url"]').attr('content'); if (ogUrl) { const lastPart = ogUrl.split('/').filter(Boolean).pop(); if (lastPart) { return lastPart.split('?')[0]; } } const releaseIdMatch = html.match(/\/api\?m=release&id=([^"'&\s>]+)/i); if (releaseIdMatch?.[1]) { return releaseIdMatch[1]; } const animeIdMatches = [ /["']anime_id["']\s*[:=]\s*["']?([^"',\s<;]+)/i, /["']animeId["']\s*[:=]\s*["']?([^"',\s<;]+)/i, /\bid\s*[:=]\s*["']?([0-9]{1,10})["']?\s*,\s*["']?(?:title|poster|episodes?)\b/i ]; for (const pattern of animeIdMatches) { const match = html.match(pattern); if (match?.[1]) { return match[1]; } } // Fallback keeps current behavior for old layouts. return animeSession; } /** * Convert noisy upstream errors into concise API-safe messages * @param {string} context - Operation context (search, episodes, etc) * @param {Error|any} error - Raw error * @returns {string} Public error message * @private */ _formatUpstreamError(context, error) { const rawMessage = String(error?.message || error || 'Unknown error'); const statusMatch = rawMessage.match(/^(\d{3})\s*-\s*/); const statusCode = statusMatch ? parseInt(statusMatch[1], 10) : null; if (statusCode === 404 || /Oops\.\.\.\s*404|404\s+Not\s+Found/i.test(rawMessage)) { if (context === 'episodes') { return 'Anime session not found. Use /search first to get a valid session id.'; } if (context === 'sources') { return 'Anime or episode session not found. Use /episodes first to get a valid episode_session.'; } if (context === 'ids') { return 'Anime session not found. Use /search first to get a valid session id.'; } } if (statusCode === 403 && /ddos-guard|checking your browser|cloudflare/i.test(rawMessage)) { return 'Upstream blocked the request (anti-bot challenge). Please retry shortly.'; } let cleaned = rawMessage .replace(//gi, ' ') .replace(//gi, ' ') .replace(/<[^>]+>/g, ' ') .replace(/\s+/g, ' ') .replace(/^\d{3}\s*-\s*"?/, '') .replace(/"$/, '') .trim(); if (!cleaned) { cleaned = 'Unexpected upstream error'; } if (cleaned.length > 220) { cleaned = `${cleaned.slice(0, 220)}...`; } return cleaned; } /** * Search for anime by query * @param {string} query - Search query * @returns {Promise} Array of anime results */ async search(query) { const url = `${this.base}/api?m=search&q=${encodeURIComponent(query)}`; try { const response = await cloudscraper.get(url, { headers: this.getHeaders() }); const data = this._parseJsonResponse(response); const animeRows = this._extractArray(data, ['data', 'results', 'items', 'animes', 'list']); const results = []; for (const anime of animeRows) { const session = anime.session || anime.slug || anime.anime_session || null; const resolutionSafeTitle = anime.title || anime.name || anime.anime_title || anime.title_en || anime.title_romaji || null; results.push({ id: anime.id || anime.anime_id || anime.aid || null, title: resolutionSafeTitle, url: anime.url || (session ? `${this.base}/anime/${session}` : null), year: anime.year || anime.release_year || null, poster: anime.poster || anime.poster_url || anime.image || anime.cover || null, type: anime.type || anime.media_type || anime.format || null, session }); } return results; } catch (error) { throw new Error(`Search failed: ${this._formatUpstreamError('search', error)}`); } } /** * Get episodes for an anime * @param {string} animeSession - Anime session ID * @returns {Promise} Array of episodes */ async getEpisodes(animeSession) { try { // Fetch anime page to get internal ID const animePageUrl = `${this.base}/anime/${animeSession}`; const html = await cloudscraper.get(animePageUrl, { headers: this.getHeaders() }); const tempId = this._extractAnimeInternalId(html, animeSession); // Fetch first page to get pagination info const firstPageUrl = `${this.base}/api?m=release&id=${encodeURIComponent(tempId)}&sort=episode_asc&page=1`; const firstPageResponse = await cloudscraper.get(firstPageUrl, { headers: this.getHeaders() }); const firstPageData = this._parseJsonResponse(firstPageResponse); let episodes = this._extractArray(firstPageData, ['data', 'results', 'items', 'episodes']); const lastPage = Number( firstPageData.last_page || firstPageData.lastPage || firstPageData.total_pages || firstPageData.pages || 1 ) || 1; // Fetch remaining pages concurrently if (lastPage > 1) { const pagePromises = []; for (let page = 2; page <= lastPage; page++) { const pageUrl = `${this.base}/api?m=release&id=${encodeURIComponent(tempId)}&sort=episode_asc&page=${page}`; pagePromises.push( cloudscraper.get(pageUrl, { headers: this.getHeaders() }) .then(response => { const data = this._parseJsonResponse(response); return this._extractArray(data, ['data', 'results', 'items', 'episodes']); }) ); } const additionalPages = await Promise.all(pagePromises); for (const pageData of additionalPages) { episodes = episodes.concat(pageData); } } // Transform to Episode format const formattedEpisodes = episodes.map(ep => ({ id: ep.id || ep.release_id || null, number: Number(ep.episode ?? ep.number ?? ep.ep ?? ep.ep_num), title: ep.title || ep.episode_title || `Episode ${ep.episode ?? ep.number ?? ''}`.trim(), snapshot: ep.snapshot || ep.thumbnail || ep.image || null, session: ep.session || ep.release_session || null })); // Sort by episode number ascending formattedEpisodes.sort((a, b) => a.number - b.number); return formattedEpisodes; } catch (error) { throw new Error(`Failed to get episodes: ${this._formatUpstreamError('episodes', error)}`); } } /** * Get streaming sources for an episode * @param {string} animeSession - Anime session ID * @param {string} episodeSession - Episode session ID * @returns {Promise} Array of streaming sources */ async getSources(animeSession, episodeSession) { try { const playUrl = `${this.base}/play/${animeSession}/${episodeSession}`; const html = await cloudscraper.get(playUrl, { headers: this.getHeaders() }); const sources = []; const $ = cheerio.load(html); // Primary extraction: parse structured data attributes without relying on attribute order. $('[data-src]').each((_, el) => { const src = ($(el).attr('data-src') || '').trim(); if (!src || !/https?:\/\/kwik\./i.test(src)) { return; } const rawResolution = ($(el).attr('data-resolution') || $(el).attr('data-res') || '').trim(); const hasResolution = /\d/.test(rawResolution); const quality = hasResolution ? (rawResolution.toLowerCase().endsWith('p') ? rawResolution : `${rawResolution}p`) : null; sources.push({ url: src, quality, fansub: ($(el).attr('data-fansub') || $(el).attr('data-fansub-id') || null), audio: ($(el).attr('data-audio') || $(el).attr('data-lang') || null) }); }); // Fallback: extract kwik links directly if (sources.length === 0) { const kwikPattern = /https?:\/\/kwik\.[a-z]+\/(?:e|f|d)\/[A-Za-z0-9_-]+/gi; let kwikMatch; while ((kwikMatch = kwikPattern.exec(html)) !== null) { sources.push({ url: kwikMatch[0], quality: null, fansub: null, audio: null }); } } if (sources.length === 0) { throw new Error('No kwik links found on play page'); } // Deduplicate sources by URL const uniqueSourcesMap = new Map(); for (const source of sources) { if (!uniqueSourcesMap.has(source.url)) { uniqueSourcesMap.set(source.url, source); } } const uniqueSources = Array.from(uniqueSourcesMap.values()); // Sort by resolution descending uniqueSources.sort((a, b) => { const getResolution = (source) => { if (!source.quality) return 0; const match = source.quality.match(/(\d+)/); return match ? parseInt(match[1]) : 0; }; return getResolution(b) - getResolution(a); }); return uniqueSources; } catch (error) { throw new Error(`Failed to get sources: ${this._formatUpstreamError('sources', error)}`); } } /** * Resolve Kwik URL to M3U8 streaming URL * @param {string} kwikUrl - Kwik page URL * @returns {Promise} Object with m3u8 URL and required referer headers */ async resolveKwikWithNode(kwikUrl) { try { // Extract referer from kwik URL - use full embed URL as referer const kwikUrlObj = new URL(kwikUrl); const kwikReferer = kwikUrl; // Full embed URL works better than just the host const kwikOrigin = `${kwikUrlObj.protocol}//${kwikUrlObj.host}`; // Fetch Kwik page const html = await cloudscraper.get(kwikUrl, { headers: this.getHeaders(), timeout: 20000 }); // Check for direct M3U8 URL in HTML const directM3u8 = extractM3U8FromText(html); if (directM3u8) { return { m3u8: directM3u8, referer: kwikReferer, origin: kwikOrigin }; } // Extract script blocks containing eval() const scriptPattern = /]*>([\s\S]*?)<\/script>/gi; const scripts = []; let scriptMatch; while ((scriptMatch = scriptPattern.exec(html)) !== null) { scripts.push(scriptMatch[1]); } // Find the best candidate script let scriptBlock = null; let largestEvalScript = null; let maxLen = 0; for (const script of scripts) { if (script.includes('eval(')) { if (script.includes('source') || script.includes('.m3u8') || script.includes('Plyr')) { scriptBlock = script; break; } if (script.length > maxLen) { maxLen = script.length; largestEvalScript = script; } } } if (!scriptBlock) { scriptBlock = largestEvalScript; } if (!scriptBlock) { // Try data-src attribute as fallback const dataSrcPattern = /data-src="([^"]+\.m3u8[^"]*)"/; const dataSrcMatch = html.match(dataSrcPattern); if (dataSrcMatch) { return { m3u8: dataSrcMatch[1], referer: kwikReferer, origin: kwikOrigin }; } throw new Error('No candidate