From bbcff523f6390ee08a4e5363a3346d5af857d4b3 Mon Sep 17 00:00:00 2001 From: himanshu8443 Date: Fri, 18 Apr 2025 19:02:57 +0530 Subject: [PATCH] Add URL checker script and update workflow for provider URL validation --- .github/scripts/url-checker.js | 129 +++++++++++++++++++++++++++++++ .github/workflows/check-urls.yml | 32 ++++++++ .github/workflows/main.yml | 124 ----------------------------- 3 files changed, 161 insertions(+), 124 deletions(-) create mode 100644 .github/scripts/url-checker.js create mode 100644 .github/workflows/check-urls.yml delete mode 100644 .github/workflows/main.yml diff --git a/.github/scripts/url-checker.js b/.github/scripts/url-checker.js new file mode 100644 index 0000000..47852b9 --- /dev/null +++ b/.github/scripts/url-checker.js @@ -0,0 +1,129 @@ +const fs = require("fs"); +const axios = require("axios"); +const path = require("path"); + +const FILE_PATH = "modflix.json"; + +// Read the modflix.json file +async function readModflixJson() { + try { + const data = fs.readFileSync(FILE_PATH, "utf8"); + return JSON.parse(data); + } catch (error) { + console.error(`Error reading ${FILE_PATH}:`, error); + process.exit(1); + } +} + +// Extract domain from URL +function getDomain(url) { + try { + const urlObj = new URL(url); + return urlObj.origin; + } catch (error) { + console.error(`Error parsing URL ${url}:`, error); + return url; + } +} + +// Extract path from URL +function getPath(url) { + try { + const urlObj = new URL(url); + return urlObj.pathname + urlObj.search + urlObj.hash; + } catch (error) { + console.error(`Error extracting path from ${url}:`, error); + return ""; + } +} + +// Check URL and return new URL if redirected +async function checkUrl(url) { + try { + const response = await axios.get(url, { + maxRedirects: 0, + validateStatus: (status) => status >= 200 && status < 400, + }); + + // If status is 200, no change needed + if (response.status === 200) { + console.log(`✅ ${url} is valid (200 OK)`); + return null; + } + } catch (error) { + // Handle redirects + if ( + error.response && + (error.response.status === 301 || + error.response.status === 302 || + error.response.status === 307 || + error.response.status === 308) + ) { + const newLocation = error.response.headers.location; + if (newLocation) { + // If it's a relative redirect, construct the full URL + let fullRedirectUrl = newLocation; + if (!newLocation.startsWith("http")) { + const baseUrl = new URL(url); + fullRedirectUrl = new URL(newLocation, baseUrl.origin).toString(); + } + + console.log(`🔄 ${url} redirects to ${fullRedirectUrl}`); + + // Get new domain but keep original path + const newDomain = getDomain(fullRedirectUrl); + const originalPath = getPath(url); + + // Construct new URL with original path + let finalUrl = newDomain; + if (originalPath && originalPath !== "/") { + finalUrl += originalPath; + } + + return finalUrl; + } + } else if (error.response) { + console.log(`⚠️ ${url} returned status ${error.response.status}`); + } else if (error.request) { + console.log(`❌ ${url} failed to respond`); + } else { + console.log(`❌ Error checking ${url}: ${error.message}`); + } + } + + // Return null if no change or error + return null; +} + +// Main function +async function main() { + const providers = await readModflixJson(); + let hasChanges = false; + + // Process each provider + for (const [key, provider] of Object.entries(providers)) { + const url = provider.url; + console.log(`Checking ${provider.name} (${url})...`); + + const newUrl = await checkUrl(url); + if (newUrl && newUrl !== url) { + provider.url = newUrl; + hasChanges = true; + console.log(`Updated ${provider.name} URL to ${newUrl}`); + } + } + + // Write changes back to file if needed + if (hasChanges) { + fs.writeFileSync(FILE_PATH, JSON.stringify(providers, null, 2)); + console.log(`Updated ${FILE_PATH} with new URLs`); + } else { + console.log(`No changes needed for ${FILE_PATH}`); + } +} + +// Execute main function +main().catch((error) => { + console.error("Unhandled error:", error); + process.exit(1); +}); diff --git a/.github/workflows/check-urls.yml b/.github/workflows/check-urls.yml new file mode 100644 index 0000000..08015da --- /dev/null +++ b/.github/workflows/check-urls.yml @@ -0,0 +1,32 @@ +name: Check Provider URLs + +on: + schedule: + - cron: "0 0 * * *" # Run daily at midnight UTC + workflow_dispatch: # Allow manual triggering + +jobs: + check-urls: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up Node.js + uses: actions/setup-node@v3 + with: + node-version: "16" + + - name: Install dependencies + run: npm install axios + + - name: Run URL checker + run: node .github/scripts/url-checker.js + + - name: Commit changes if any + run: | + git config --global user.name 'GitHub Action' + git config --global user.email 'action@github.com' + git add modflix.json + git diff --cached --quiet || git commit -m "Update provider URLs [skip ci]" && git push diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index e9042a0..0000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,124 +0,0 @@ -name: Check Website URLs - -on: - schedule: - # Runs daily at 03:00 UTC - - cron: '0 3 * * *' - workflow_dispatch: # Allows manual triggering - -permissions: - contents: write # Needed to commit changes back to the repo - -jobs: - check-urls: - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - # Fetch full history to be able to compare changes if needed (optional) - fetch-depth: 0 - - - name: Install jq - run: sudo apt-get update && sudo apt-get install -y jq - - - name: Check URLs and update modflix.json - id: check_urls - run: | - echo "Starting URL check process..." - cp modflix.json modflix.json.bak # Backup original file - updated_json=$(cat modflix.json) - changes_made=false - - # Iterate over each key in the JSON object - for key in $(echo "$updated_json" | jq -r 'keys_unsorted[]'); do - original_url=$(echo "$updated_json" | jq -r --arg k "$key" '.[$k].url') - echo "Checking key: $key, URL: $original_url" - - # Use curl to follow redirects (-L), get final URL (-w '%{url_effective}'), - # suppress output (-s), discard body (-o /dev/null), get status code (-w '%{http_code}') - # Set timeouts to prevent hanging - # IMPORTANT: Capture exit code separately to handle failures without stopping the script - response=$(curl -Lso /dev/null -w "%{http_code} %{url_effective}" --max-time 15 --connect-timeout 7 "$original_url") - curl_exit_code=$? # Capture curl's exit code - - # === Failure Handling === - # Check if curl itself failed (e.g., DNS error, connection timeout) - if [ $curl_exit_code -ne 0 ]; then - echo " -> curl command failed with exit code $curl_exit_code for URL: $original_url. Skipping." - continue # Skip to the next URL in the loop - fi - - # === Success Handling (curl exit code was 0) === - # Parse the response only if curl succeeded - http_code=$(echo "$response" | awk '{print $1}') - final_url=$(echo "$response" | awk '{print $2}') - - echo " -> Status: $http_code, Final URL: $final_url" - - # Check if it was a redirect (3xx) and the final URL is different - if [[ "$http_code" =~ ^3[0-9]{2}$ ]] && [[ "$original_url" != "$final_url" ]]; then - echo " -> Redirect detected. Original: $original_url, New Effective: $final_url" - - # Extract the scheme and host from the final URL - new_scheme_host=$(echo "$final_url" | sed -E 's|^(https?://[^/]+).*|\1|') - - # Extract the path, query, and fragment from the original URL - original_path_etc=$(echo "$original_url" | sed -E 's|https?://[^/]+(.*)|\1|') - if [[ -z "$original_path_etc" ]]; then - if [[ "$original_url" == */ ]]; then - original_path_etc="/" - fi - fi - - # Construct the new URL using the new scheme/host and original path/query/fragment - new_url="${new_scheme_host}${original_path_etc}" - echo " -> Constructing new URL: $new_url" - - # Update the JSON content in the variable - updated_json=$(echo "$updated_json" | jq --arg k "$key" --arg nu "$new_url" '.[$k].url = $nu') - changes_made=true - echo " -> Updated JSON for key $key" - - elif [[ "$http_code" == "200" ]]; then - echo " -> URL OK (200)." # Do nothing as requested - - else - # Handle non-200, non-3xx statuses (e.g., 404, 500) or other curl success scenarios - echo " -> URL check returned status $http_code. No changes made for this URL." # Do nothing as requested - fi - done # End of loop for keys - - # --- Post-loop logic (Commit changes if any) --- - if $changes_made; then - echo "Changes were made. Writing updated modflix.json" - echo "$updated_json" | jq '.' > modflix.json # Write updated content back, pretty-printed - echo "changes_detected=true" >> $GITHUB_OUTPUT - else - echo "No changes detected in URLs that required updates." - # Restore original file to avoid timestamp changes if only formatting changed - # Check if backup exists before moving - if [ -f modflix.json.bak ]; then - mv modflix.json.bak modflix.json - fi - echo "changes_detected=false" >> $GITHUB_OUTPUT - fi - - - name: Commit and push changes - if: steps.check_urls.outputs.changes_detected == 'true' - run: | - git config --global user.name 'github-actions[bot]' - git config --global user.email 'github-actions[bot]@users.noreply.github.com' - git add modflix.json - # Check if there are staged changes before committing - if ! git diff --staged --quiet; then - git commit -m "chore: Update redirected URLs in modflix.json - - Automatically updated URLs based on HTTP redirects." - git push - echo "Changes committed and pushed." - else - echo "No effective changes to commit after writing file (might be formatting only)." - fi - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Use the default token