Sorry for a long question. But please help.
I have written a tensorflow-serving client in java, that requests the tensorflow server hosted on another machine. The communication is through GRPC and it is working fine, i.e., responses come for the requests. However, the responses that come are wrong. The model's job is to predict humans (with helmet and without helmet) in a photo that client sends (and the model is fine).
So this problem may have arised due to some bug in formatting the image, maybe in dimensions, etc. But I have tried figuring out all the small detail for days, but in vain.
Moreover, for this, I have written a client in python also, and surprisingly it works fine. The response from the server is correct. But I need to do this in java. So in a nutshell, I am sending the same image to the same server with a java and a python client, and get two different results.
Here I put both the clients' codes:
Python-
#PYTHON_CLIENT
from __future__ import print_function
from grpc.beta import implementations
import tensorflow as tf
import glob
import json
from object_detection.utils import visualization_utils as vis_util
from object_detection.utils import plot_util
from object_detection.utils import label_map_util
import object_detection.utils.ops as utils_ops
from PIL import Image
from google.protobuf import json_format as _json_format
import numpy as np
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2
from object_detection.protos import string_int_label_map_pb2
from object_detection.utils import visualization_utils as vis_util
import cv2
import numpy as np
tf.app.flags.DEFINE_string('server', '<someIPaddress>:9000', 'PredictionService host:port')
tf.app.flags.DEFINE_string('image', './', 'path to image in JPEG format')
FLAGS = tf.app.flags.FLAGS
def out(result):
detection_boxes=[]
detection_scores =[]
detection_classes =[]
db=[]
dc=[]
ds=[]
db.append(result.outputs['detection_boxes'].tensor_shape.dim[0].size)
db.append(result.outputs['detection_boxes'].tensor_shape.dim[1].size)
db.append(result.outputs['detection_boxes'].tensor_shape.dim[2].size)
detection_boxes = np.asarray((result.outputs['detection_boxes'].float_val))
detection_boxes = detection_boxes.reshape([db[0],db[1],db[2]])
print(detection_boxes)
detection_classes = np.asarray((result.outputs['detection_classes'].float_val))
dc.append(result.outputs['detection_classes'].tensor_shape.dim[0].size)
dc.append(result.outputs['detection_classes'].tensor_shape.dim[1].size)
detection_classes = detection_classes.reshape([dc[0],dc[1]])
print(detection_classes)
detection_scores = np.asarray((result.outputs['detection_scores'].float_val))
ds.append(result.outputs['detection_scores'].tensor_shape.dim[0].size)
ds.append(result.outputs['detection_scores'].tensor_shape.dim[1].size)
detection_scores = detection_scores.reshape([dc[0],dc[1]])
print(detection_scores)
return detection_classes,detection_scores,detection_boxes
def main(_):
host, port = FLAGS.server.split(':')
channel = implementations.insecure_channel(host, int(port))
stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
# Create prediction request object
request = predict_pb2.PredictRequest()
request.model_spec.name = 'deeplab'
request.model_spec.signature_name = 'predict_images'
image_data = []
for image in glob.glob(FLAGS.image+'cde.jpg'):
# with open(image, 'rb') as f:
image = cv2.imread(image)
image = image.astype('f')
# image = np.expand_dims(image,0)
image_data.append(image)
# print(cv2.imread(image))
image_data2 = np.asarray(image_data)
# image_data = np.expand_dims(image_data,4)
request.inputs['inputs'].CopyFrom(tf.contrib.util.make_tensor_proto(image_data2, dtype=tf.uint8 ,shape=None))
result = stub.Predict(request, 10.0) # 10 secs timeout
m=[]
n=[]
p=[]
print(result.outputs)
category_index = label_map_util.create_category_index_from_labelmap('/home/<somePathHere>/labels.pbtxt', use_display_name=True)
# Visualization of the results of a detection. # image_data = np.expand_dims(image_data,4)
request.inputs['inputs'].CopyFrom(tf.contrib.util.make_tensor_proto(image_data2, dtype=tf.uint8 ,shape=None))
vis_util.visualize_boxes_and_labels_on_image_array(
image_data,
p,
m,
n,
category_index,
min_score_thresh=.5,
# instance_masks=output_dict.get('detection_masks'),
use_normalized_coordinates=True,
line_thickness=8,
)
if __name__ == '__main__':
tf.app.run()
Java-
//JAVA_CLIENT
public static void main(String[] args) {
String host = "<someIPaddress>";
int port = 9000;
String modelName = "deeplab";
long modelVersion = 1;
// Run predict client to send request
PredictClientt_One client = new PredictClientt_One(host, port);
try {
client.do_predict(modelName, modelVersion);
} catch (Exception e) {
System.out.println(e);
} finally {
try {
client.shutdown();
} catch (Exception e) {
System.out.println(e);
}
}
}
public void shutdown() throws InterruptedException {
channel.shutdown().awaitTermination(5, TimeUnit.SECONDS);
}
public void do_predict(String modelName, long modelVersion) {
// Generate image file to array
int[][][][] featuresTensorData = new int[1][1080][1920][3];
String[] imageFilenames = new String[]{"./cde.jpg"};
for (int i = 0; i < imageFilenames.length; i++) {
// Convert image file to multi-dimension array
File imageFile = new File(imageFilenames[i]);
try {
BufferedImage preImage = ImageIO.read(imageFile);
BufferedImage image = new BufferedImage(preImage.getWidth(), preImage.getHeight(), BufferedImage.TYPE_INT_ARGB); //convert to argb
image.getGraphics().drawImage(preImage, 0, 0, null);
logger.info("Start to convert the image: " + imageFile.getPath());
int imageWidth = 1920;
int imageHeight = 1080;
for (int row = 0; row < imageHeight; row++) {
for (int column = 0; column < imageWidth; column++) {
Color col = new Color (image.getRGB(column, row));
// int red = (pixel >> 16) & 0xff;
// int green = (pixel >> 8) & 0xff;
// int blue = (pixel) & 0xff;
//tried all combination of red, green and blue in [0], [1] and [2]
featuresTensorData[i][row][column][0] = col.getBlue(); //blue;
featuresTensorData[i][row][column][1] = col.getGreen(); //green
featuresTensorData[i][row][column][2] = col.getRed(); //red;
}
}
} catch (IOException e) {
logger.log(Level.WARNING, e.getMessage());
System.exit(1);
}
}
// Generate features TensorProto
TensorProto.Builder featuresTensorBuilder = TensorProto.newBuilder();
for (int i = 0; i < featuresTensorData.length; ++i) {
for (int j = 0; j < featuresTensorData[i].length; ++j) {
for (int k = 0; k < featuresTensorData[i][j].length; ++k) {
for (int l = 0; l < featuresTensorData[i][j][k].length; ++l) {
featuresTensorBuilder.addFloatVal(featuresTensorData[i][j][k][l]);
}
}
}
}
TensorShapeProto.Dim featuresDim1 = TensorShapeProto.Dim.newBuilder().setSize(1).build();
TensorShapeProto.Dim featuresDim2 = TensorShapeProto.Dim.newBuilder().setSize(1080).build();
TensorShapeProto.Dim featuresDim3 = TensorShapeProto.Dim.newBuilder().setSize(1920).build();
TensorShapeProto.Dim featuresDim4 = TensorShapeProto.Dim.newBuilder().setSize(3).build();
TensorShapeProto featuresShape = TensorShapeProto.newBuilder().addDim(featuresDim1).addDim(featuresDim2).addDim(featuresDim3).addDim(featuresDim4).build();
featuresTensorBuilder.setDtype(org.tensorflow.framework.DataType.DT_UINT8).setTensorShape(featuresShape);
TensorProto featuresTensorProto = featuresTensorBuilder.build();
// Generate gRPC request
com.google.protobuf.Int64Value version = com.google.protobuf.Int64Value.newBuilder().setValue(modelVersion).build();
Model.ModelSpec modelSpec = Model.ModelSpec.newBuilder().setName(modelName).setVersion(version).build();
Predict.PredictRequest request = Predict.PredictRequest.newBuilder().setModelSpec(modelSpec).putInputs("inputs", featuresTensorProto).build();
// Request gRPC server
Predict.PredictResponse response;
try {
response = blockingStub.predict(request);
java.util.Map<java.lang.String, org.tensorflow.framework.TensorProto> outputs = response.getOutputsMap();
for (java.util.Map.Entry<java.lang.String, org.tensorflow.framework.TensorProto> entry : outputs.entrySet()) {
System.out.println("Key: " + entry.getKey() + ",\nValue: " + entry.getValue());
}
} catch (StatusRuntimeException e) {
logger.log(Level.WARNING, "RPC failed: {0}", e.getStatus());
return;
}
}
The response from the server comes in the form of a hash-map (or a dictionary) containing four key-value pairs:
{
'detection_scores': <some value>,
'detection_boxes': <some value>,
'detection_classes': <some value>,
'num_detections': <some value>
}
The python's 'detection_scores' have values that goes like: 0.9.., 0.8..., 0.7..., 0.1..., 0.04... (so 3 humans detected).
Whereas, the java's 'detection_scores' have values starting from: 0.005.. (in the same photo.). Moreover, all the bounding boxes are also placed on the far left of the photo and python's bounding boxes are on human-faces.
Please help. And thank you for reading patiently!
I am answering my own question, as I just figured the solution.
The thing that I need to fix is to change to addFloatVal()
to addIntVal()
.
Here:
TensorProto.Builder featuresTensorBuilder = TensorProto.newBuilder();
for (int i = 0; i < featuresTensorData.length; ++i) {
for (int j = 0; j < featuresTensorData[i].length; ++j) {
for (int k = 0; k < featuresTensorData[i][j].length; ++k) {
for (int l = 0; l < featuresTensorData[i][j][k].length; ++l) {
featuresTensorBuilder.addFloatVal(featuresTensorData[i][j][k][l]); //In this line
}
}
}
}
Such a small fix and I have wasted 2 whole days doing everything I could! Sad.