I am trying to create a custom handler on Torchserve.
The custom handler has been modified as follows
# custom handler file
# model_handler.py
"""
ModelHandler defines a custom model handler.
"""
import os
import soundfile
from espnet2.bin.enh_inference import *
from ts.torch_handler.base_handler import BaseHandler
class ModelHandler(BaseHandler):
"""
A custom model handler implementation.
"""
def __init__(self):
self._context = None
self.initialized = False
self.model = None
self.device = None
def initialize(self, context):
"""
Invoke by torchserve for loading a model
:param context: context contains model server system properties
:return:
"""
# load the model
self.manifest = context.manifest
properties = context.system_properties
model_dir = properties.get("model_dir")
self.device = torch.device("cuda:" + str(properties.get("gpu_id")) if torch.cuda.is_available() else "cpu")
# Read model serialize/pt file
serialized_file = self.manifest['model']['serializedFile']
model_pt_path = os.path.join(model_dir, serialized_file)
if not os.path.isfile(model_pt_path):
raise RuntimeError("Missing the model.pt file")
self.model = SeparateSpeech("train_enh_transformer_tf.yaml", "valid.loss.best.pth")
self.initialized = True
def preprocess(self,data):
audio_data, rate = soundfile.read(data)
preprocessed_data = audio_data[np.newaxis, :]
return preprocessed_data
def inference(self, model_input):
model_output = self.model(model_input)
return model_output
def postprocess(self, inference_output):
"""
Return inference result.
:param inference_output: list of inference output
:return: list of predict results
"""
# Take output from network and post-process to desired format
postprocess_output = inference_output
#wav ni suru
return postprocess_output
def handle(self, data, context):
model_input = self.preprocess(data)
model_output = self.inference(model_input)
return self.postprocess(model_output)
The torchserve appears to be working.
The Torchserve logs are as follows
root@5c780ba74916:~/files# torchserve --start --ncs --model-store model_store --models denoise_transformer=denoise_transformer.mar
root@5c780ba74916:~/files# WARNING: sun.reflect.Reflection.getCallerClass is not supported. This will impact performance.
2022-08-24T14:06:06,662 [INFO ] main org.pytorch.serve.servingsdk.impl.PluginsManager - Initializing plugins manager...
2022-08-24T14:06:06,796 [INFO ] main org.pytorch.serve.ModelServer -
Torchserve version: 0.6.0
TS Home: /usr/local/lib/python3.8/dist-packages
Current directory: /root/files
Temp directory: /tmp
Number of GPUs: 1
Number of CPUs: 16
Max heap size: 4002 M
Python executable: /usr/bin/python3
Config file: N/A
Inference address: http://127.0.0.1:8080
Management address: http://127.0.0.1:8081
Metrics address: http://127.0.0.1:8082
Model Store: /root/files/model_store
Initial Models: denoise_transformer=denoise_transformer.mar
Log dir: /root/files/logs
Metrics dir: /root/files/logs
Netty threads: 0
Netty client threads: 0
Default workers per model: 1
Blacklist Regex: N/A
Maximum Response Size: 6553500
Maximum Request Size: 6553500
Limit Maximum Image Pixels: true
Prefer direct buffer: false
Allowed Urls: [file://.*|http(s)?://.*]
Custom python dependency for model allowed: false
Metrics report format: prometheus
Enable metrics API: true
Workflow Store: /root/files/model_store
Model config: N/A
2022-08-24T14:06:06,805 [INFO ] main org.pytorch.serve.servingsdk.impl.PluginsManager - Loading snapshot serializer plugin...
2022-08-24T14:06:06,817 [INFO ] main org.pytorch.serve.ModelServer - Loading initial models: denoise_transformer.mar
2022-08-24T14:06:07,006 [DEBUG] main org.pytorch.serve.wlm.ModelVersionedRefs - Adding new version 1.0 for model denoise_transformer
2022-08-24T14:06:07,007 [DEBUG] main org.pytorch.serve.wlm.ModelVersionedRefs - Setting default version to 1.0 for model denoise_transformer
2022-08-24T14:06:07,007 [INFO ] main org.pytorch.serve.wlm.ModelManager - Model denoise_transformer loaded.
2022-08-24T14:06:07,007 [DEBUG] main org.pytorch.serve.wlm.ModelManager - updateModel: denoise_transformer, count: 1
2022-08-24T14:06:07,015 [DEBUG] W-9000-denoise_transformer_1.0 org.pytorch.serve.wlm.WorkerLifeCycle - Worker cmdline: [/usr/bin/python3, /usr/local/lib/python3.8/dist-packages/ts/model_service_worker.py, --sock-type, unix, --sock-name, /tmp/.ts.sock.9000]
2022-08-24T14:06:07,016 [INFO ] main org.pytorch.serve.ModelServer - Initialize Inference server with: EpollServerSocketChannel.
2022-08-24T14:06:07,059 [INFO ] main org.pytorch.serve.ModelServer - Inference API bind to: http://127.0.0.1:8080
2022-08-24T14:06:07,059 [INFO ] main org.pytorch.serve.ModelServer - Initialize Management server with: EpollServerSocketChannel.
2022-08-24T14:06:07,060 [INFO ] main org.pytorch.serve.ModelServer - Management API bind to: http://127.0.0.1:8081
2022-08-24T14:06:07,060 [INFO ] main org.pytorch.serve.ModelServer - Initialize Metrics server with: EpollServerSocketChannel.
2022-08-24T14:06:07,062 [INFO ] main org.pytorch.serve.ModelServer - Metrics API bind to: http://127.0.0.1:8082
Model server started.
2022-08-24T14:06:07,258 [WARN ] pool-3-thread-1 org.pytorch.serve.metrics.MetricCollector - worker pid is not available yet.
2022-08-24T14:06:07,363 [INFO ] W-9000-denoise_transformer_1.0-stdout MODEL_LOG - Listening on port: /tmp/.ts.sock.9000
2022-08-24T14:06:07,364 [INFO ] W-9000-denoise_transformer_1.0-stdout MODEL_LOG - [PID]6258
2022-08-24T14:06:07,364 [INFO ] W-9000-denoise_transformer_1.0-stdout MODEL_LOG - Torch worker started.
2022-08-24T14:06:07,365 [INFO ] W-9000-denoise_transformer_1.0-stdout MODEL_LOG - Python runtime: 3.8.10
2022-08-24T14:06:07,365 [DEBUG] W-9000-denoise_transformer_1.0 org.pytorch.serve.wlm.WorkerThread - W-9000-denoise_transformer_1.0 State change null -> WORKER_STARTED
2022-08-24T14:06:07,368 [INFO ] W-9000-denoise_transformer_1.0 org.pytorch.serve.wlm.WorkerThread - Connecting to: /tmp/.ts.sock.9000
2022-08-24T14:06:07,374 [INFO ] W-9000-denoise_transformer_1.0-stdout MODEL_LOG - Connection accepted: /tmp/.ts.sock.9000.
2022-08-24T14:06:07,376 [INFO ] W-9000-denoise_transformer_1.0 org.pytorch.serve.wlm.WorkerThread - Flushing req. to backend at: 1661317567376
2022-08-24T14:06:07,398 [INFO ] W-9000-denoise_transformer_1.0-stdout MODEL_LOG - model_name: denoise_transformer, batchSize: 1
2022-08-24T14:06:07,596 [INFO ] pool-3-thread-1 TS_METRICS - CPUUtilization.Percent:0.0|#Level:Host|#hostname:5c780ba74916,timestamp:1661317567
2022-08-24T14:06:07,596 [INFO ] pool-3-thread-1 TS_METRICS - DiskAvailable.Gigabytes:220.49971389770508|#Level:Host|#hostname:5c780ba74916,timestamp:1661317567
2022-08-24T14:06:07,597 [INFO ] pool-3-thread-1 TS_METRICS - DiskUsage.Gigabytes:17.66714859008789|#Level:Host|#hostname:5c780ba74916,timestamp:1661317567
2022-08-24T14:06:07,597 [INFO ] pool-3-thread-1 TS_METRICS - DiskUtilization.Percent:7.4|#Level:Host|#hostname:5c780ba74916,timestamp:1661317567
2022-08-24T14:06:07,597 [INFO ] pool-3-thread-1 TS_METRICS - GPUMemoryUtilization.Percent:17.9931640625|#Level:Host,device_id:0|#hostname:5c780ba74916,timestamp:1661317567
2022-08-24T14:06:07,597 [INFO ] pool-3-thread-1 TS_METRICS - GPUMemoryUsed.Megabytes:1474|#Level:Host,device_id:0|#hostname:5c780ba74916,timestamp:1661317567
2022-08-24T14:06:07,598 [INFO ] pool-3-thread-1 TS_METRICS - GPUUtilization.Percent:8|#Level:Host,device_id:0|#hostname:5c780ba74916,timestamp:1661317567
2022-08-24T14:06:07,598 [INFO ] pool-3-thread-1 TS_METRICS - MemoryAvailable.Megabytes:14307.53515625|#Level:Host|#hostname:5c780ba74916,timestamp:1661317567
2022-08-24T14:06:07,598 [INFO ] pool-3-thread-1 TS_METRICS - MemoryUsed.Megabytes:1372.1640625|#Level:Host|#hostname:5c780ba74916,timestamp:1661317567
2022-08-24T14:06:07,598 [INFO ] pool-3-thread-1 TS_METRICS - MemoryUtilization.Percent:10.6|#Level:Host|#hostname:5c780ba74916,timestamp:1661317567
2022-08-24T14:06:08,306 [INFO ] W-9000-denoise_transformer_1.0-stdout MODEL_LOG - encoder self-attention layer type = self-attention
2022-08-24T14:06:08,328 [INFO ] W-9000-denoise_transformer_1.0-stdout MODEL_LOG - Eps is deprecated in si_snr loss, set clamp_db instead.
2022-08-24T14:06:08,373 [INFO ] W-9000-denoise_transformer_1.0-stdout MODEL_LOG - Perform direct speech enhancement on the input
2022-08-24T14:06:08,390 [INFO ] W-9000-denoise_transformer_1.0 org.pytorch.serve.wlm.WorkerThread - Backend response time: 992
2022-08-24T14:06:08,391 [DEBUG] W-9000-denoise_transformer_1.0 org.pytorch.serve.wlm.WorkerThread - W-9000-denoise_transformer_1.0 State change WORKER_STARTED -> WORKER_MODEL_LOADED
2022-08-24T14:06:08,391 [INFO ] W-9000-denoise_transformer_1.0 TS_METRICS - W-9000-denoise_transformer_1.0.ms:1380|#Level:Host|#hostname:5c780ba74916,timestamp:1661317568
2022-08-24T14:06:08,391 [INFO ] W-9000-denoise_transformer_1.0 TS_METRICS - WorkerThreadTime.ms:23|#Level:Host|#hostname:5c780ba74916,timestamp:1661317568
I sent the wav file to torchserve with the following command
curl http://127.0.0.1:8080/predictions/denoise_transformer -T Mix.wav
However, the following error was returned
<HTML><HEAD>
<TITLE>Request Error</TITLE>
</HEAD>
<BODY>
<FONT face="Helvetica">
<big><strong></strong></big><BR>
</FONT>
<blockquote>
<TABLE border=0 cellPadding=1 width="80%">
<TR><TD>
<FONT face="Helvetica">
<big>Request Error (invalid_request)</big>
<BR>
<BR>
</FONT>
</TD></TR>
<TR><TD>
<FONT face="Helvetica">
Your request could not be processed. Request could not be handled
</FONT>
</TD></TR>
<TR><TD>
<FONT face="Helvetica">
This could be caused by a misconfiguration, or possibly a malformed request.
</FONT>
</TD></TR>
<TR><TD>
<FONT face="Helvetica" SIZE=2>
<BR>
For assistance, contact your network support team.
</FONT>
</TD></TR>
</TABLE>
</blockquote>
</FONT>
</BODY></HTML>
Is there somewhere wrong? Best regards.
Hi here's an example but it seems the problem should be how you receive parse the data
from typing import Dict, List, Tuple
import numpy as np
import soundfile as sf
from io import BytesIO
from ts.torch_handler.base_handler import BaseHandler
import torch
from model import Model
class SoundModelHandler(BaseHandler):
def __init__(self):
super(SoundModelHandler, self).__init__()
self.initialized = False
def preproc_one_wav(self, req: Dict[str, bytearray]) -> Tuple[np.ndarray, int]:
"""
Function to convert req data to image
:param req:
:return: np array of wav form
"""
wav_byte = req.get("data")
if wav_byte is None:
wav_byte = req.get('body')
# create a stream from the encoded image
wav, sr = sf.read(BytesIO(wav_byte))
return wav, sr
def initialize(self, context):
"""
Invoke by torchserve for loading a model
:param context: context contains model server system properties
:return:
"""
self.manifest = context.manifest
properties = context.system_properties
model_dir = properties.get("model_dir")
self._context = context
self.model = Model()
self.model.load_state_dict(torch.load(model_dir + 'weights.pt'))
self.initialize = True
self.device = torch.device(
"cuda:" + str(properties.get("gpu_id"))
if torch.cuda.is_available() and properties.get("gpu_id") is not None
else "cpu"
)
def preprocess(self, requests: List[Dict[str, bytearray]]):
"""
Function to prepare data from the model
:param requests:
:return: tensor of the processed shape specified by the model
"""
batch_crop = [self.preproc_one_wav(req) for req in requests]
# Do something here if you want return as torch tensor
# You can apply torch cat here as well
batch = torch.cat(batch_crop)
return batch
def inference(self, model_input: torch.Tensor):
"""
Given the data from .preprocess, perform inference using the model.
:param reqeuest:
:return: Logits or predictions given by the model
"""
with torch.no_grad():
generated_ids = self.model.generate(model_input.to(self.device))
return generated_ids
Btw heres an example you can do to make a request to your model in python
import json
import requests
sample = {'data': wav_in_byte}
results = requests.post('ip:port', data=json.dumps(sample))
# parse results
get_results = results.json()