Making predictions on live video feed using React Native and Tensorflow.js

I have setup my react native app done all the installations and configurations of unimodules and packages are working as expected. No problem with dependency etc.

Now I want to implement a tensorflow model that I've trained from teachablemachine by google and I couldn't understand how to use it with camera because I'd like to process the frames from real time just like tensorflow react native api docs say. This is a code I found online and I will change it with my model but the problem is it only detects the model when user takes picture. I want my camera to understand model in real time just like face detection, barcode scanner.

Main.js

import React, {useRef, useEffect, useState} from 'react';
import {View, StyleSheet, Dimensions} from 'react-native';

import {
  getModel,
  convertBase64ToTensor,
  startPrediction,
} from '../../helpers/tensor-helper';

import {Camera} from 'expo-camera';

import * as tf from '@tensorflow/tfjs';
import '@tensorflow/tfjs-react-native';
import {
  cameraWithTensors,
  bundleResourceIO,
} from '@tensorflow/tfjs-react-native';

const TensorCamera = cameraWithTensors(Camera);

const Main = () => {
  const [model, setModel] = useState();
  const [prediction, setPredictions] = useState();
  const cameraRef = useRef(null);

  let requestAnimationFrameId = 0;
  let frameCount = 0;
  let makePredictionsEveryNFrame = 1;

  const modelJson = require('../../model/model.json');
  const modelWeights = require('../../model/weights.bin');

  const getModel = async () => {
    try {
      await tf.ready();
      const model = await tf.loadLayersModel(
        bundleResourceIO(modelJson, modelWeights),
      );
      return model;
    } catch (error) {
      console.log('Could not load model', error);
    }
  };
  useEffect(() => {
    setModel(getModel());
  }, []);

  useEffect(() => {
    return () => {
      cancelAnimationFrame(requestAnimationFrameId);
    };
  }, [requestAnimationFrameId]);

  const handleCameraStream = tensors => {
    if (!tensors) {
      console.log('Image not found!');
    }
    const loop = async () => {
      if (frameCount % makePredictionsEveryNFrame === 0) {
        const imageTensor = tensors.next().value;
        if (model) {
          const results = await startPrediction(model, imageTensor);
          setPredictions(results);
          console.log(`prediction: ${JSON.stringify(prediction)}`);
        }
        tf.dispose(tensors);
      }
      frameCount += 1;
      frameCount = frameCount % makePredictionsEveryNFrame;
      requestAnimationFrameId = requestAnimationFrame(loop);
    };
    console.log(`prediction: ${JSON.stringify(prediction)}`);

    loop();
    console.log(`prediction: ${JSON.stringify(prediction)}`);
  };

  let textureDims;
  if (Platform.OS === 'ios') {
    textureDims = {
      height: 1920,
      width: 1080,
    };
  } else {
    textureDims = {
      height: 1200,
      width: 1600,
    };
  }
  return (
    <View style={styles.container}>
      <TensorCamera
        ref={cameraRef}
        // Standard Camera props
        style={styles.camera}
        type={Camera.Constants.Type.back}
        flashMode={Camera.Constants.FlashMode.off}
        // Tensor related props
        cameraTextureHeight={textureDims.height}
        cameraTextureWidth={textureDims.width}
        resizeHeight={50}
        resizeWidth={50}
        resizeDepth={3}
        onReady={tensors => handleCameraStream(tensors)}
        autorender={true}
      />
    </View>
  );
};



export default Main;

tensorhelper.js:

import * as tf from '@tensorflow/tfjs';
import {bundleResourceIO, decodeJpeg} from '@tensorflow/tfjs-react-native';
import * as tfc from '@tensorflow/tfjs-core';

import {Base64Binary} from '../utils/utils';
const BITMAP_DIMENSION = 224;

const modelJson = require('../model/model.json');
const modelWeights = require('../model/weights.bin');

// 0: channel from JPEG-encoded image
// 1: gray scale
// 3: RGB image
const TENSORFLOW_CHANNEL = 3;

export const getModel = async () => {
  try {
    await tf.ready();
    const model = await tf.loadLayersModel(
      bundleResourceIO(modelJson, modelWeights),
    );
    return model;
  } catch (error) {
    console.log('Could not load model', error);
  }
};

export const convertBase64ToTensor = async base64 => {
  try {
    const uIntArray = Base64Binary.decode(base64);
    // decode a JPEG-encoded image to a 3D Tensor of dtype
    const decodedImage = decodeJpeg(uIntArray, 3);
    // reshape Tensor into a 4D array
    return decodedImage.reshape([
      1,
      BITMAP_DIMENSION,
      BITMAP_DIMENSION,
      TENSORFLOW_CHANNEL,
    ]);
  } catch (error) {
    console.log('Could not convert base64 string to tesor', error);
  }
};

export const startPrediction = async (model, tensor) => {
  try {
    // predict against the model
    const output = await model.predict(tensor);
    // return typed array

    return tfc.tensor().dataSync();
  } catch (error) {
    console.log('Error predicting from tesor image', error);
  }
};

I edited files and get this as output:

 LOG  prediction: undefined
 LOG  prediction: undefined
 WARN  Possible Unhandled Promise Rejection (id: 1):
Error: When using targetShape.depth=3, targetShape.width must be a multiple of 4. Alternatively do not call detectGLCapabilities()
fromTexture@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:267911:24
nextFrameGenerator$@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:268598:67  
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26710:32
loop$@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126503:43
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26710:32
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26610:30
http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26640:19
tryCallTwo@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:31390:9
doResolve@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:31554:25
Promise@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:31413:14
callInvokeWithMethodAndArg@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26639:33
enqueue@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26644:157
async@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26661:69
loop@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126494:42
handleCameraStream@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126535:11   
onReady@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126572:34
onGLContextCreate$@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:268641:37   
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26710:32   
__callImmediates@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3317:35       
http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3096:34
__guard@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3300:15
flushedQueue@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3095:21
flushedQueue@[native code]
invokeCallbackAndReturnFlushedQueue@[native code]```

Solution

Ok so I did this awhile back(last year) so I might have forgotten something but you can just refer to the code here, uses Expo and makes predictions on a live video feed just pardon the really bad code(I write better code now).

Anyway this a simple update on what you need to do which is mainly about handleCameraStream(). You will need to run two different useEffect hooks, one for initially loading up the model and the other one for canceling the animation frames which you will need to use when make predictions constantly.

Set the model into state then you can access it using model from any part in the file. I also did the same for predictions.

I have also added the ability to make predictions every N number of frames , by setting makePredictionsEveryNFrames to 1 it basically passes the tensors from TensorCamera to the function to make predictions every single frame. After making predictions you will also want to dispose of the tensors using tf.dispose(). This function loop() needs to be run infinitely to make predictions on oncoming frames continuously.

const Main = () => {
  const [model, setModel] = useState();
  const [predictions, setPredictions] = useState();

  let requestAnimationFrameId = 0;
  let frameCount = 0;
  let makePredictionsEveryNFrames = 1;

  useEffect(() => {
    setModel(await getModel());
  }, []);

  useEffect(() => {
    return () => {
      cancelAnimationFrame(requestAnimationFrameId);
    };
  }, [requestAnimationFrameId]);

  const handleCameraStream = (tensors) => {
    if (!tensors) {
      console.log("Image not found!");
    }
    const loop = async () => {
      if (frameCount % makePredictionsEveryNFrame === 0) {
        const imageTensor = tensors.next().value;
        if (model) {
          const results = await startPrediction(model, imageTensor);
          setPredictions(results);
        }
        tf.dispose(tensors); 
      }
      frameCount += 1;
      frameCount = frameCount % makePredictionsEveryNFrame;
      requestAnimationFrameId = requestAnimationFrame(loop);
    };
    loop();
  };
}

I updated the getModel() to return the model when it is loaded this way we can set it in state.

export const getModel = async () => {
  try {
    await tf.ready();
    const model = await tf.loadLayersModel(
      bundleResourceIO(modelJson, modelWeights)
    );
    return model;
  } catch (error) {
    console.log("Could not load model", error);
  }
};

So you would then just need to access the predictions and render them.

Edit 1:

Looking back at the code there are some issues with the startPredictions function, you were not actually returning the predictions from the model and you would need to make predictions on a single batch of images at a time.

export const startPrediction = async (model, tensor) => {
  try {
    // predict against the model
    const output = await model.predict(tensor, {batchSize: 1});
    return output.dataSync(); 
  } catch (error) {
    console.log('Error predicting from tesor image', error);
  }
};

Edit 2:

Looking at the model input shape here the expected input shape is (batch_size, 224,224,3). But you are passing in an image of (batch_size, 50,50,3). So try updating the parameters resizeWidth and resizeHeight to 224.

<TensorCamera
        ref={cameraRef}
        // Standard Camera props
        style={styles.camera}
        type={Camera.Constants.Type.back}
        flashMode={Camera.Constants.FlashMode.off}
        // Tensor related props
        cameraTextureHeight={textureDims.height}
        cameraTextureWidth={textureDims.width}
        resizeHeight={224}
        resizeWidth={224}
        resizeDepth={3}
        onReady={tensors => handleCameraStream(tensors)}
        autorender={true}
      />

In addition to that, you will need to also convert the 3D tensor to a 4D tensor before passing it to the model for predictions also known as expanding one of the dimensions. Update the handleCameraStream function to this as well. The size of the tensor is (224,224,3) and after expanding the first dimension it will be (1,224,224,3).

const handleCameraStream = (tensors) => {
    if (!tensors) {
      console.log("Image not found!");
    }
    const loop = async () => {
      if (frameCount % makePredictionsEveryNFrame === 0) {
        const imageTensor = tensors.next().value;
        if (model) {
          const imageTensorReshaped = imageTensor.expandDims(axis=0);
          const results = await startPrediction(model, imageTensorReshaped);
          setPredictions(results);
        }
        tf.dispose(imageTensorReshaped); 
      }
      frameCount += 1;
      frameCount = frameCount % makePredictionsEveryNFrame;
      requestAnimationFrameId = requestAnimationFrame(loop);
    };
    loop();
  };