mirror of
https://github.com/mdtahseen7/AnimepaheApi.git
synced 2026-04-17 16:11:44 +00:00
Harden AnimePahe response parsing and source extraction
This commit is contained in:
180
lib/animepahe.js
180
lib/animepahe.js
@@ -11,11 +11,11 @@ const path = require('path');
|
||||
*/
|
||||
class AnimePahe {
|
||||
constructor() {
|
||||
this.base = 'https://animepahe.com';
|
||||
this.base = process.env.ANIMEPAHE_BASE || 'https://animepahe.com';
|
||||
this.headers = {
|
||||
'User-Agent': randomUserAgent(),
|
||||
'Cookie': '__ddg1_=;__ddg2_=',
|
||||
'Referer': 'https://animepahe.com/'
|
||||
'Referer': `${this.base}/`
|
||||
};
|
||||
}
|
||||
|
||||
@@ -30,6 +30,80 @@ class AnimePahe {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse an API response that may be a JSON string or object
|
||||
* @param {string|Object} response - Raw response
|
||||
* @returns {Object} Parsed JSON object
|
||||
* @private
|
||||
*/
|
||||
_parseJsonResponse(response) {
|
||||
if (typeof response === 'string') {
|
||||
return JSON.parse(response);
|
||||
}
|
||||
return response || {};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract first matching array from a response object
|
||||
* @param {Object|Array} payload - Response payload
|
||||
* @param {Array<string>} keys - Candidate keys
|
||||
* @returns {Array} Array value or empty array
|
||||
* @private
|
||||
*/
|
||||
_extractArray(payload, keys = []) {
|
||||
if (Array.isArray(payload)) {
|
||||
return payload;
|
||||
}
|
||||
|
||||
for (const key of keys) {
|
||||
if (Array.isArray(payload?.[key])) {
|
||||
return payload[key];
|
||||
}
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Best-effort extraction of internal anime id used by release API
|
||||
* @param {string} html - Anime detail page html
|
||||
* @param {string} animeSession - Public anime session id
|
||||
* @returns {string} Internal id
|
||||
* @private
|
||||
*/
|
||||
_extractAnimeInternalId(html, animeSession) {
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
const ogUrl = $('meta[property="og:url"]').attr('content');
|
||||
if (ogUrl) {
|
||||
const lastPart = ogUrl.split('/').filter(Boolean).pop();
|
||||
if (lastPart) {
|
||||
return lastPart.split('?')[0];
|
||||
}
|
||||
}
|
||||
|
||||
const releaseIdMatch = html.match(/\/api\?m=release&id=([^"'&\s>]+)/i);
|
||||
if (releaseIdMatch?.[1]) {
|
||||
return releaseIdMatch[1];
|
||||
}
|
||||
|
||||
const animeIdMatches = [
|
||||
/["']anime_id["']\s*[:=]\s*["']?([^"',\s<;]+)/i,
|
||||
/["']animeId["']\s*[:=]\s*["']?([^"',\s<;]+)/i,
|
||||
/\bid\s*[:=]\s*["']?([0-9]{1,10})["']?\s*,\s*["']?(?:title|poster|episodes?)\b/i
|
||||
];
|
||||
|
||||
for (const pattern of animeIdMatches) {
|
||||
const match = html.match(pattern);
|
||||
if (match?.[1]) {
|
||||
return match[1];
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback keeps current behavior for old layouts.
|
||||
return animeSession;
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for anime by query
|
||||
* @param {string} query - Search query
|
||||
@@ -43,18 +117,22 @@ class AnimePahe {
|
||||
headers: this.getHeaders()
|
||||
});
|
||||
|
||||
const data = typeof response === 'string' ? JSON.parse(response) : response;
|
||||
const data = this._parseJsonResponse(response);
|
||||
const animeRows = this._extractArray(data, ['data', 'results', 'items', 'animes', 'list']);
|
||||
const results = [];
|
||||
|
||||
for (const anime of (data.data || [])) {
|
||||
for (const anime of animeRows) {
|
||||
const session = anime.session || anime.slug || anime.anime_session || null;
|
||||
const resolutionSafeTitle = anime.title || anime.name || anime.anime_title || anime.title_en || anime.title_romaji || null;
|
||||
|
||||
results.push({
|
||||
id: anime.id,
|
||||
title: anime.title,
|
||||
url: `${this.base}/anime/${anime.session}`,
|
||||
year: anime.year,
|
||||
poster: anime.poster,
|
||||
type: anime.type,
|
||||
session: anime.session
|
||||
id: anime.id || anime.anime_id || anime.aid || null,
|
||||
title: resolutionSafeTitle,
|
||||
url: anime.url || (session ? `${this.base}/anime/${session}` : null),
|
||||
year: anime.year || anime.release_year || null,
|
||||
poster: anime.poster || anime.poster_url || anime.image || anime.cover || null,
|
||||
type: anime.type || anime.media_type || anime.format || null,
|
||||
session
|
||||
});
|
||||
}
|
||||
|
||||
@@ -77,40 +155,35 @@ class AnimePahe {
|
||||
headers: this.getHeaders()
|
||||
});
|
||||
|
||||
// Parse HTML to extract meta tag
|
||||
const $ = cheerio.load(html);
|
||||
const metaTag = $('meta[property="og:url"]');
|
||||
|
||||
if (!metaTag.length) {
|
||||
throw new Error('Could not find session ID in meta tag');
|
||||
}
|
||||
|
||||
const metaContent = metaTag.attr('content');
|
||||
const tempId = metaContent.split('/').pop();
|
||||
const tempId = this._extractAnimeInternalId(html, animeSession);
|
||||
|
||||
// Fetch first page to get pagination info
|
||||
const firstPageUrl = `${this.base}/api?m=release&id=${tempId}&sort=episode_asc&page=1`;
|
||||
const firstPageUrl = `${this.base}/api?m=release&id=${encodeURIComponent(tempId)}&sort=episode_asc&page=1`;
|
||||
const firstPageResponse = await cloudscraper.get(firstPageUrl, {
|
||||
headers: this.getHeaders()
|
||||
});
|
||||
|
||||
const firstPageData = typeof firstPageResponse === 'string'
|
||||
? JSON.parse(firstPageResponse)
|
||||
: firstPageResponse;
|
||||
const firstPageData = this._parseJsonResponse(firstPageResponse);
|
||||
|
||||
let episodes = firstPageData.data || [];
|
||||
const lastPage = firstPageData.last_page || 1;
|
||||
let episodes = this._extractArray(firstPageData, ['data', 'results', 'items', 'episodes']);
|
||||
const lastPage = Number(
|
||||
firstPageData.last_page ||
|
||||
firstPageData.lastPage ||
|
||||
firstPageData.total_pages ||
|
||||
firstPageData.pages ||
|
||||
1
|
||||
) || 1;
|
||||
|
||||
// Fetch remaining pages concurrently
|
||||
if (lastPage > 1) {
|
||||
const pagePromises = [];
|
||||
for (let page = 2; page <= lastPage; page++) {
|
||||
const pageUrl = `${this.base}/api?m=release&id=${tempId}&sort=episode_asc&page=${page}`;
|
||||
const pageUrl = `${this.base}/api?m=release&id=${encodeURIComponent(tempId)}&sort=episode_asc&page=${page}`;
|
||||
pagePromises.push(
|
||||
cloudscraper.get(pageUrl, { headers: this.getHeaders() })
|
||||
.then(response => {
|
||||
const data = typeof response === 'string' ? JSON.parse(response) : response;
|
||||
return data.data || [];
|
||||
const data = this._parseJsonResponse(response);
|
||||
return this._extractArray(data, ['data', 'results', 'items', 'episodes']);
|
||||
})
|
||||
);
|
||||
}
|
||||
@@ -123,11 +196,11 @@ class AnimePahe {
|
||||
|
||||
// Transform to Episode format
|
||||
const formattedEpisodes = episodes.map(ep => ({
|
||||
id: ep.id,
|
||||
number: ep.episode,
|
||||
title: ep.title || `Episode ${ep.episode}`,
|
||||
snapshot: ep.snapshot,
|
||||
session: ep.session
|
||||
id: ep.id || ep.release_id || null,
|
||||
number: Number(ep.episode ?? ep.number ?? ep.ep ?? ep.ep_num),
|
||||
title: ep.title || ep.episode_title || `Episode ${ep.episode ?? ep.number ?? ''}`.trim(),
|
||||
snapshot: ep.snapshot || ep.thumbnail || ep.image || null,
|
||||
session: ep.session || ep.release_session || null
|
||||
}));
|
||||
|
||||
// Sort by episode number ascending
|
||||
@@ -152,26 +225,33 @@ class AnimePahe {
|
||||
headers: this.getHeaders()
|
||||
});
|
||||
|
||||
// Extract button data attributes using regex
|
||||
const buttonPattern = /<button[^>]+data-src="([^"]+)"[^>]+data-fansub="([^"]+)"[^>]+data-resolution="([^"]+)"[^>]+data-audio="([^"]+)"[^>]*>/g;
|
||||
const sources = [];
|
||||
let match;
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
while ((match = buttonPattern.exec(html)) !== null) {
|
||||
const [, src, fansub, resolution, audio] = match;
|
||||
if (src.startsWith('https://kwik.')) {
|
||||
sources.push({
|
||||
url: src,
|
||||
quality: `${resolution}p`,
|
||||
fansub: fansub,
|
||||
audio: audio
|
||||
});
|
||||
// Primary extraction: parse structured data attributes without relying on attribute order.
|
||||
$('[data-src]').each((_, el) => {
|
||||
const src = ($(el).attr('data-src') || '').trim();
|
||||
if (!src || !/https?:\/\/kwik\./i.test(src)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const rawResolution = ($(el).attr('data-resolution') || $(el).attr('data-res') || '').trim();
|
||||
const hasResolution = /\d/.test(rawResolution);
|
||||
const quality = hasResolution
|
||||
? (rawResolution.toLowerCase().endsWith('p') ? rawResolution : `${rawResolution}p`)
|
||||
: null;
|
||||
|
||||
sources.push({
|
||||
url: src,
|
||||
quality,
|
||||
fansub: ($(el).attr('data-fansub') || $(el).attr('data-fansub-id') || null),
|
||||
audio: ($(el).attr('data-audio') || $(el).attr('data-lang') || null)
|
||||
});
|
||||
});
|
||||
|
||||
// Fallback: extract kwik links directly
|
||||
if (sources.length === 0) {
|
||||
const kwikPattern = /https:\/\/kwik\.(si|cx|link)\/e\/\w+/g;
|
||||
const kwikPattern = /https?:\/\/kwik\.[a-z]+\/(?:e|f|d)\/[A-Za-z0-9_-]+/gi;
|
||||
let kwikMatch;
|
||||
while ((kwikMatch = kwikPattern.exec(html)) !== null) {
|
||||
sources.push({
|
||||
@@ -200,7 +280,7 @@ class AnimePahe {
|
||||
uniqueSources.sort((a, b) => {
|
||||
const getResolution = (source) => {
|
||||
if (!source.quality) return 0;
|
||||
const match = source.quality.match(/(\d+)p/);
|
||||
const match = source.quality.match(/(\d+)/);
|
||||
return match ? parseInt(match[1]) : 0;
|
||||
};
|
||||
return getResolution(b) - getResolution(a);
|
||||
|
||||
Reference in New Issue
Block a user