node.jsjsonbroken-links

How to test links in a JSON file?


I have a database in JSON containing multiple links and I cannot find a library to crawl it for valid and invalid links. I want to test each URL inside to look for broken links.

Property example:

{
      "Organisation": "British Association for Sexual Health and HIV",
      "Abréviation": "BASHH",
      "Spécialité": "infectiologie",
      "Type": "societe savante",
      "Actualités": "https://www.bashh.org/news/news/",
      "RSS": "No",
      "Publications ouvertes": "https://www.bashh.org/guidelines",
      "Publications RSS": "No",
      "Social": "https://x.com/BASHH_UK"
    },

Solution

  • I had to build a script thanks to Codestral. Works on any JSON layout.

    You need NodeJS 16+.

    import axios from 'axios'
    import fs from 'fs/promises'
    
    // Find all links
    function isUrl(str) {
      const urlPattern = new RegExp('^(https?:\\/\\/)?' + // protocol
        '((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|' + // domain name
        '((\\d{1,3}\\.){3}\\d{1,3}))' + // OR ip (v4) address
        '(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*' + // port and path
        '(\\?[;&a-z\\d%_.~+=-]*)?' + // query string
        '(\\#[-a-z\\d_]*)?$', 'i'); // fragment locator
      return !!urlPattern.test(str);
    }
    
    // Test links
    async function testUrl(url) {
      try {
        const response = await axios.get(url);
        // console.log(`${url} is working. Statut : ${response.status}`);
      } catch (error) {
        console.error(`${url} is down. Erreur : ${error.message}`);
      }
    }
    
    // Look for links in JSON
    function traverse(obj) {
      for (let key in obj) {
        if (obj[key] !== null && typeof obj[key] === 'object') {
          traverse(obj[key]);
        } else if (typeof obj[key] === 'string' && isUrl(obj[key])) {
          testUrl(obj[key]);
        }
      }
    }
    
    // Read JSON and test links
    async function main() {
    // Set JSON file location
      try {
        const data = await fs.readFile('../static/data/societes-savantes.json', 'utf8');
        const jsonData = JSON.parse(data);
        traverse(jsonData);
      } catch (error) {
        console.error(`Erreur lors de la lecture du fichier JSON : ${error.message}`);
      }
    }
    
    main()