I deployed llama2-chat-13b
from model garden. However, I am getting error while trying to perform inference.
Configuration:
project="X";
endpoint_id="Y";
location="us-east1";
64 VCPUs, 57.6 GB RAM;
GPU= 4 T4;
I tried three approaches, but all of them are returning some kind of error:
Approach 1:
from typing import Dict, List, Union
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value
def predict_custom_trained_model_sample(
project: str,
endpoint_id: str,
instances: Union[Dict, List[Dict]],
location: str = "us-east1",
api_endpoint: str = "us-east1-aiplatform.googleapis.com",
):
"""
`instances` can be either single instance of type dict or a list
of instances.
"""
# The AI Platform services require regional API endpoints.
client_options = {"api_endpoint": api_endpoint}
# Initialize client that will be used to create and send requests.
# This client only needs to be created once, and can be reused for multiple requests.
client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
# The format of each instance should conform to the deployed model's prediction input schema.
instances = instances if isinstance(instances, list) else [instances]
instances = [
json_format.ParseDict(instance_dict, Value()) for instance_dict in instances
]
parameters_dict = {}
parameters = json_format.ParseDict(parameters_dict, Value())
endpoint = client.endpoint_path(
project=project, location=location, endpoint=endpoint_id
)
response = client.predict(
endpoint=endpoint, instances=instances, parameters=parameters
)
print("response")
print(" deployed_model_id:", response.deployed_model_id)
# The predictions are a google.protobuf.Value representation of the model's predictions.
predictions = response.predictions
for prediction in predictions:
print(" prediction:", dict(prediction))
# [END aiplatform_predict_custom_trained_model_sample]
predict_custom_trained_model_sample(
project="",
endpoint_id="",
location="us-east1",
instances = [
{
"prompt":["hello"]
},
]
)
I get the following error:
_InactiveRpcError Traceback (most recent call last)
/usr/local/lib/python3.10/dist-packages/google/api_core/grpc_helpers.py in error_remapped_callable(*args, **kwargs)
71 try:
---> 72 return callable_(*args, **kwargs)
73 except grpc.RpcError as exc:
6 frames
_InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.INTERNAL
details = "Internal Server Error"
debug_error_string = "UNKNOWN:Error received from peer ipv4:172.217.15.234:443 {created_time:"2023-10-31T20:13:00.233826088+00:00", grpc_status:13, grpc_message:"Internal Server Error"}"
>
The above exception was the direct cause of the following exception:
InternalServerError Traceback (most recent call last)
/usr/local/lib/python3.10/dist-packages/google/api_core/grpc_helpers.py in error_remapped_callable(*args, **kwargs)
72 return callable_(*args, **kwargs)
73 except grpc.RpcError as exc:
---> 74 raise exceptions.from_grpc_error(exc) from exc
75
76 return error_remapped_callable
InternalServerError: 500 Internal Server Error
Approach 2:
import vertexai
from vertexai.language_models import TextGenerationModel
vertexai.init(project="X", location="us-east1")
parameters = {
"candidate_count": 1,
"max_output_tokens": 1024,
"temperature": 0.2,
"top_p": 0.8,
"top_k": 40
}
model = TextGenerationModel.from_pretrained("Llama2-13B-chat-001")
I get the following error:
_InactiveRpcError Traceback (most recent call last)
/usr/local/lib/python3.10/dist-packages/google/api_core/grpc_helpers.py in error_remapped_callable(*args, **kwargs)
71 try:
---> 72 return callable_(*args, **kwargs)
73 except grpc.RpcError as exc:
11 frames
_InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.INVALID_ARGUMENT
details = "Invalid publisher model resource format."
debug_error_string = "UNKNOWN:Error received from peer ipv4:172.217.0.74:443 {created_time:"2023-10-31T20:23:51.049594923+00:00", grpc_status:3, grpc_message:"Invalid publisher model resource format."}"
>
The above exception was the direct cause of the following exception:
InvalidArgument Traceback (most recent call last)
/usr/local/lib/python3.10/dist-packages/google/api_core/grpc_helpers.py in error_remapped_callable(*args, **kwargs)
72 return callable_(*args, **kwargs)
73 except grpc.RpcError as exc:
---> 74 raise exceptions.from_grpc_error(exc) from exc
75
76 return error_remapped_callable
InvalidArgument: 400 Invalid publisher model resource format.
Finally, I tried with langchain. Approach 3:
from langchain.llms.vertexai import VertexAIModelGarden, VertexAI
llm = VertexAIModelGarden(
project="X",
endpoint_id="Y",
location='us-east1'
)
llm("hello")
I get the following error:
TypeError: string indices must be integers
Well, this is working for me.
from typing import Dict, List, Union
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value
def predict_custom_trained_model_sample(
project: str,
endpoint_id: str,
instances: Union[Dict, List[Dict]],
location: str = "us-central1",
):
"""
`instances` can be either single instance of type dict or a list
of instances.
"""
api_endpoint = f"{location}-aiplatform.googleapis.com"
# The AI Platform services require regional API endpoints.
client_options = {"api_endpoint": api_endpoint}
# Initialize client that will be used to create and send requests.
# This client only needs to be created once, and can be reused for multiple requests.
client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
# The format of each instance should conform to the deployed model's prediction input schema.
instances = instances if isinstance(instances, list) else [instances]
instances = [
json_format.ParseDict(instance_dict, Value()) for instance_dict in instances
]
parameters_dict = {}
parameters = json_format.ParseDict(parameters_dict, Value())
endpoint = client.endpoint_path(
project=project, location=location, endpoint=endpoint_id
)
response = client.predict(
endpoint=endpoint, instances=instances, parameters=parameters
)
print("Response")
print("Deployed Model ID:", response.deployed_model_id)
# The predictions are a google.protobuf.Value representation of the model's predictions.
predictions = response.predictions
for prediction in predictions:
print("prediction:", prediction)
query = "Who is Albert Einstein?"
predict_custom_trained_model_sample(
project="X",
endpoint_id="Y",
location="us-east1",
instances = [
{
"prompt": query, "temperature": 0.0
},
]
)