javascriptjsonarraybuffer

Converting huge JSON object to Blob by stringifying "directly into" an ArrayBuffer/Blob to avoid maximum string length error


Context:

I have some code like this in my application:

let blob = new Blob([JSON.stringify(json)], {type: "application/json"});

However, it sometimes fails because in Chrome the maximum string length is ~500MB, and json can sometimes be larger than this.

Question:

I'm looking for a way to go straight from my json variable (i.e. a POJO) to a Blob, probably via some sort of streaming stringification that saves to an ArrayBuffer as it goes. Or any other way to get a large json object into a Blob without running into a 'maximum string length' error.

Notes:

Related:


Solution

  • This works:

    function jsonToBlob(json) {
      const textEncoder = new TextEncoder();
      const seen = new WeakSet();
    
      function processValue(value) {
        if(seen.has(value)) {
          throw new TypeError("Converting circular structure to JSON");
        }
    
        if(value && typeof value.toJSON === "function") {
          value = value.toJSON();
        }
    
        if(typeof value === 'object' && value !== null) {
          seen.add(value);
    
          const blobParts = [];
          const entries = Array.isArray(value) ? value : Object.entries(value);
          for(let i = 0; i < entries.length; i++) {
            if(Array.isArray(value)) {
              blobParts.push(processValue(entries[i]));
            } else {
              const [key, val] = entries[i];
              blobParts.push(textEncoder.encode(JSON.stringify(key) + ':'), processValue(val));
            }
            if(i !== entries.length - 1) blobParts.push(textEncoder.encode(','));
          }
    
          const startBracket = Array.isArray(value) ? '[' : '{';
          const endBracket = Array.isArray(value) ? ']' : '}';
          return new Blob([textEncoder.encode(startBracket), ...blobParts, textEncoder.encode(endBracket)]);
        } else if(typeof value === 'function' || typeof value === 'undefined') {
          return textEncoder.encode("null");
        } else {
          // For primitives we just convert it to string and encode
          return textEncoder.encode(JSON.stringify(value));
        }
      }
    
      return processValue(json);
    }
    

    ✅ Test 1:

    let blob = jsonToBlob([{hello:{foo:[1,2,3], a:1, bar:["a", 2, {$hi:[1,2,3, {a:3}]}]}}, 4, new Date(),, (()=>{})]);
    console.log(JSON.parse(await blob.text()));
    

    ✅ Test 2:

    let json = {};
    for(let i = 0; i < 600000; i++) {
      json[Math.random()] = Math.random().toString().repeat(100);
    }
    let blob = jsonToBlob(json);
    console.log(blob); // ~1 GB
    

    Will update this answer if I find any errors/problems when this gets to production.

    Update: One year later, the only issue I've run into the with the above solution is that it's a bit slow in some cases. Here's a version that produces the exact same output but is more than 10x faster in my real-world tests:

    function jsonToBlob(json) {
      const textEncoder = new TextEncoder();
      const seen = new WeakSet();
      let buffer = new Uint8Array(1024 * 1024); // Start with 1MB buffer
      let position = 0;
      let stringBuffer = '';
    
      function ensureCapacity(additionalBytes) {
        if (position + additionalBytes > buffer.length) {
          const newBuffer = new Uint8Array(Math.max(buffer.length * 2, position + additionalBytes));
          newBuffer.set(buffer);
          buffer = newBuffer;
        }
      }
    
      function writeToBuffer(str) {
        const encoded = textEncoder.encode(str);
        ensureCapacity(encoded.length);
        buffer.set(encoded, position);
        position += encoded.length;
      }
    
      function flushStringBuffer() {
        if (stringBuffer.length > 0) {
          writeToBuffer(stringBuffer);
          stringBuffer = '';
        }
      }
    
      function processValue(value) {
        if (seen.has(value)) {
          throw new TypeError("Converting circular structure to JSON");
        }
    
        if (value && typeof value.toJSON === "function") {
          value = value.toJSON();
        }
    
        if (typeof value === 'object' && value !== null) {
          seen.add(value);
    
          const isArray = Array.isArray(value);
          stringBuffer += isArray ? '[' : '{';
    
          let first = true;
          for (const [key, val] of Object.entries(value)) {
            if (!first) stringBuffer += ',';
            first = false;
    
            if (!isArray) {
              stringBuffer += JSON.stringify(key) + ':';
            }
    
            processValue(val);
          }
    
          stringBuffer += isArray ? ']' : '}';
        } else if (typeof value === 'function' || typeof value === 'undefined') {
          stringBuffer += 'null';
        } else {
          stringBuffer += JSON.stringify(value);
        }
    
        // Flush the string buffer if it gets too large
        if (stringBuffer.length > 1024) {
          flushStringBuffer();
        }
      }
    
      processValue(json);
      flushStringBuffer();
    
      return new Blob([buffer.subarray(0, position)]);
    }