Harden AnimePahe response parsing and source extraction

This commit is contained in:
Md Tahseen Hussain
2026-04-03 19:46:16 +05:30
parent a2a7ed8235
commit c0366dedef

View File

@@ -11,11 +11,11 @@ const path = require('path');
*/
class AnimePahe {
constructor() {
this.base = 'https://animepahe.com';
this.base = process.env.ANIMEPAHE_BASE || 'https://animepahe.com';
this.headers = {
'User-Agent': randomUserAgent(),
'Cookie': '__ddg1_=;__ddg2_=',
'Referer': 'https://animepahe.com/'
'Referer': `${this.base}/`
};
}
@@ -30,6 +30,80 @@ class AnimePahe {
};
}
/**
* Parse an API response that may be a JSON string or object
* @param {string|Object} response - Raw response
* @returns {Object} Parsed JSON object
* @private
*/
_parseJsonResponse(response) {
if (typeof response === 'string') {
return JSON.parse(response);
}
return response || {};
}
/**
* Extract first matching array from a response object
* @param {Object|Array} payload - Response payload
* @param {Array<string>} keys - Candidate keys
* @returns {Array} Array value or empty array
* @private
*/
_extractArray(payload, keys = []) {
if (Array.isArray(payload)) {
return payload;
}
for (const key of keys) {
if (Array.isArray(payload?.[key])) {
return payload[key];
}
}
return [];
}
/**
* Best-effort extraction of internal anime id used by release API
* @param {string} html - Anime detail page html
* @param {string} animeSession - Public anime session id
* @returns {string} Internal id
* @private
*/
_extractAnimeInternalId(html, animeSession) {
const $ = cheerio.load(html);
const ogUrl = $('meta[property="og:url"]').attr('content');
if (ogUrl) {
const lastPart = ogUrl.split('/').filter(Boolean).pop();
if (lastPart) {
return lastPart.split('?')[0];
}
}
const releaseIdMatch = html.match(/\/api\?m=release&id=([^"'&\s>]+)/i);
if (releaseIdMatch?.[1]) {
return releaseIdMatch[1];
}
const animeIdMatches = [
/["']anime_id["']\s*[:=]\s*["']?([^"',\s<;]+)/i,
/["']animeId["']\s*[:=]\s*["']?([^"',\s<;]+)/i,
/\bid\s*[:=]\s*["']?([0-9]{1,10})["']?\s*,\s*["']?(?:title|poster|episodes?)\b/i
];
for (const pattern of animeIdMatches) {
const match = html.match(pattern);
if (match?.[1]) {
return match[1];
}
}
// Fallback keeps current behavior for old layouts.
return animeSession;
}
/**
* Search for anime by query
* @param {string} query - Search query
@@ -43,18 +117,22 @@ class AnimePahe {
headers: this.getHeaders()
});
const data = typeof response === 'string' ? JSON.parse(response) : response;
const data = this._parseJsonResponse(response);
const animeRows = this._extractArray(data, ['data', 'results', 'items', 'animes', 'list']);
const results = [];
for (const anime of (data.data || [])) {
for (const anime of animeRows) {
const session = anime.session || anime.slug || anime.anime_session || null;
const resolutionSafeTitle = anime.title || anime.name || anime.anime_title || anime.title_en || anime.title_romaji || null;
results.push({
id: anime.id,
title: anime.title,
url: `${this.base}/anime/${anime.session}`,
year: anime.year,
poster: anime.poster,
type: anime.type,
session: anime.session
id: anime.id || anime.anime_id || anime.aid || null,
title: resolutionSafeTitle,
url: anime.url || (session ? `${this.base}/anime/${session}` : null),
year: anime.year || anime.release_year || null,
poster: anime.poster || anime.poster_url || anime.image || anime.cover || null,
type: anime.type || anime.media_type || anime.format || null,
session
});
}
@@ -77,40 +155,35 @@ class AnimePahe {
headers: this.getHeaders()
});
// Parse HTML to extract meta tag
const $ = cheerio.load(html);
const metaTag = $('meta[property="og:url"]');
if (!metaTag.length) {
throw new Error('Could not find session ID in meta tag');
}
const metaContent = metaTag.attr('content');
const tempId = metaContent.split('/').pop();
const tempId = this._extractAnimeInternalId(html, animeSession);
// Fetch first page to get pagination info
const firstPageUrl = `${this.base}/api?m=release&id=${tempId}&sort=episode_asc&page=1`;
const firstPageUrl = `${this.base}/api?m=release&id=${encodeURIComponent(tempId)}&sort=episode_asc&page=1`;
const firstPageResponse = await cloudscraper.get(firstPageUrl, {
headers: this.getHeaders()
});
const firstPageData = typeof firstPageResponse === 'string'
? JSON.parse(firstPageResponse)
: firstPageResponse;
const firstPageData = this._parseJsonResponse(firstPageResponse);
let episodes = firstPageData.data || [];
const lastPage = firstPageData.last_page || 1;
let episodes = this._extractArray(firstPageData, ['data', 'results', 'items', 'episodes']);
const lastPage = Number(
firstPageData.last_page ||
firstPageData.lastPage ||
firstPageData.total_pages ||
firstPageData.pages ||
1
) || 1;
// Fetch remaining pages concurrently
if (lastPage > 1) {
const pagePromises = [];
for (let page = 2; page <= lastPage; page++) {
const pageUrl = `${this.base}/api?m=release&id=${tempId}&sort=episode_asc&page=${page}`;
const pageUrl = `${this.base}/api?m=release&id=${encodeURIComponent(tempId)}&sort=episode_asc&page=${page}`;
pagePromises.push(
cloudscraper.get(pageUrl, { headers: this.getHeaders() })
.then(response => {
const data = typeof response === 'string' ? JSON.parse(response) : response;
return data.data || [];
const data = this._parseJsonResponse(response);
return this._extractArray(data, ['data', 'results', 'items', 'episodes']);
})
);
}
@@ -123,11 +196,11 @@ class AnimePahe {
// Transform to Episode format
const formattedEpisodes = episodes.map(ep => ({
id: ep.id,
number: ep.episode,
title: ep.title || `Episode ${ep.episode}`,
snapshot: ep.snapshot,
session: ep.session
id: ep.id || ep.release_id || null,
number: Number(ep.episode ?? ep.number ?? ep.ep ?? ep.ep_num),
title: ep.title || ep.episode_title || `Episode ${ep.episode ?? ep.number ?? ''}`.trim(),
snapshot: ep.snapshot || ep.thumbnail || ep.image || null,
session: ep.session || ep.release_session || null
}));
// Sort by episode number ascending
@@ -152,26 +225,33 @@ class AnimePahe {
headers: this.getHeaders()
});
// Extract button data attributes using regex
const buttonPattern = /<button[^>]+data-src="([^"]+)"[^>]+data-fansub="([^"]+)"[^>]+data-resolution="([^"]+)"[^>]+data-audio="([^"]+)"[^>]*>/g;
const sources = [];
let match;
const $ = cheerio.load(html);
while ((match = buttonPattern.exec(html)) !== null) {
const [, src, fansub, resolution, audio] = match;
if (src.startsWith('https://kwik.')) {
sources.push({
url: src,
quality: `${resolution}p`,
fansub: fansub,
audio: audio
});
// Primary extraction: parse structured data attributes without relying on attribute order.
$('[data-src]').each((_, el) => {
const src = ($(el).attr('data-src') || '').trim();
if (!src || !/https?:\/\/kwik\./i.test(src)) {
return;
}
}
const rawResolution = ($(el).attr('data-resolution') || $(el).attr('data-res') || '').trim();
const hasResolution = /\d/.test(rawResolution);
const quality = hasResolution
? (rawResolution.toLowerCase().endsWith('p') ? rawResolution : `${rawResolution}p`)
: null;
sources.push({
url: src,
quality,
fansub: ($(el).attr('data-fansub') || $(el).attr('data-fansub-id') || null),
audio: ($(el).attr('data-audio') || $(el).attr('data-lang') || null)
});
});
// Fallback: extract kwik links directly
if (sources.length === 0) {
const kwikPattern = /https:\/\/kwik\.(si|cx|link)\/e\/\w+/g;
const kwikPattern = /https?:\/\/kwik\.[a-z]+\/(?:e|f|d)\/[A-Za-z0-9_-]+/gi;
let kwikMatch;
while ((kwikMatch = kwikPattern.exec(html)) !== null) {
sources.push({
@@ -200,7 +280,7 @@ class AnimePahe {
uniqueSources.sort((a, b) => {
const getResolution = (source) => {
if (!source.quality) return 0;
const match = source.quality.match(/(\d+)p/);
const match = source.quality.match(/(\d+)/);
return match ? parseInt(match[1]) : 0;
};
return getResolution(b) - getResolution(a);