javascript video webrtc html5-video video-processing

InsertableStreams take a already modified videoFrame input

Let's say we have :

const trackProcessor = new MediaStreamTrackProcessor({track: tracks})
const trackGenerator = new MediaStreamTrackGenerator({kind: 'video'})

const transformer = new TransformStream({
   async transform(videoFrame, controller) {
      ... some frame tranformation
      videoFrame.close()
      controller.enqueue(newFrame)
  }
})

trackProcessor.readable.pipeThrough(transformer).pipeTo(trackGenerator.writable)

In this example our n+1 videoFrame take the current newFrame ? Then apply the same tranformation on the already tranformed frame...

For example, if the tranformation is a blur, the blur will be applied multiple times (every time transformer is call.

How to deal with this if you want to apply one time a blur ?

Solution

I'm not 100% sure to understand your question, but given your setup, the transformation would be applied only once per frame.

The input track is not modified, your generator is its own MediaStreamTrack.
You can see this in the below snippet where I generate a click-controlled frame-by-frame stream, that the input is indeed not modified, and the transformation is applied only once per new frame.

const blurringCtx = new OffscreenCanvas(300, 150).getContext("2d");
const { track, next } = getTracks();
const trackProcessor = new MediaStreamTrackProcessor({track})
const trackGenerator = new MediaStreamTrackGenerator({kind: 'video'})

const transformer = new TransformStream({
   async transform(videoFrame, controller) {
      const newFrame = blur(videoFrame);
      videoFrame.close();
      controller.enqueue(newFrame)
  }
})

trackProcessor.readable
  .pipeThrough(transformer)
  .pipeTo(trackGenerator.writable)

const [videoIn, videoOut] = document.querySelectorAll("video");
videoIn.srcObject = new MediaStream([track]);
videoOut.srcObject = new MediaStream([trackGenerator]);

document.querySelector("button").onclick = () => next();
next();

function getTracks() {
  const canvas = document.createElement("canvas");
  const ctx = canvas.getContext("2d");
  ctx.font = "50px sans-serif";
  ctx.textAlign = "center";
  ctx.textBaseline = "middle";
  let i = 0;
  const [track] = canvas.captureStream(0).getTracks();
  const next = () => {
    ctx.clearRect(0, 0, 300, 150);
    ctx.fillText(`frame #${++i}`, 150, 75);
    track.requestFrame();
  };
  return {
    track,
    next,
  };
}

function blur(frame) {
  console.log("blurring a new frame");
  blurringCtx.reset();
  blurringCtx.filter = "blur(5px)";
  blurringCtx.drawImage(frame, 0, 0);
  return new VideoFrame(blurringCtx.canvas, frame);
}

<button>draw next frame</button>
<section style="display: flex; gap: 10px">
  <p>input:<br><video controls autoplay muted></video></p>
  <p>output:<br><video controls autoplay muted></video></p>
</section>

Now, if you wanted to transform only a single frame in the whole stream, or only a few, you can very well enqueue the input frame directly when you don't want to apply the transformation:

const blurringCtx = new OffscreenCanvas(300, 150).getContext("2d");
const { track, next } = getTracks();
const trackProcessor = new MediaStreamTrackProcessor({track})
const trackGenerator = new MediaStreamTrackGenerator({kind: 'video'})

const transformer = new TransformStream({
   async transform(videoFrame, controller) {
      if (document.querySelector("input").checked) {
        const newFrame = blur(videoFrame);
        videoFrame.close();
        controller.enqueue(newFrame)        
      }
      else {
        controller.enqueue(videoFrame);
      }
  }
})

trackProcessor.readable
  .pipeThrough(transformer)
  .pipeTo(trackGenerator.writable)

const [videoIn, videoOut] = document.querySelectorAll("video");
videoIn.srcObject = new MediaStream([track]);
videoOut.srcObject = new MediaStream([trackGenerator]);

document.querySelector("button").onclick = () => next();
next();

function getTracks() {
  const canvas = document.createElement("canvas");
  const ctx = canvas.getContext("2d");
  ctx.font = "50px sans-serif";
  ctx.textAlign = "center";
  ctx.textBaseline = "middle";
  let i = 0;
  const [track] = canvas.captureStream(0).getTracks();
  const next = () => {
    ctx.clearRect(0, 0, 300, 150);
    ctx.fillText(`frame #${++i}`, 150, 75);
    track.requestFrame();
  };
  return {
    track,
    next,
  };
}

function blur(frame) {
  blurringCtx.reset();
  blurringCtx.filter = "blur(5px)";
  blurringCtx.drawImage(frame, 0, 0);
  return new VideoFrame(blurringCtx.canvas, frame);
}

<button>draw next frame</button> <label>Apply blur<input type="checkbox" checked></label>
<section style="display: flex; gap: 10px">
  <p>input:<br><video controls autoplay muted></video></p>
  <p>output:<br><video controls autoplay muted></video></p>
</section>