The example from the official documentation is as follows:
import weaviate
import weaviate.classes as wvc
import os, json, requests
client = weaviate.connect_to_local(
headers={
"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"],
"X-Cohere-Api-Key": os.environ["COHERE_API_KEY"],
}
)
# Define a new schema
collection = client.collections.create(
name="Named_Vector_Jeopardy_Collection",
description="Jeopardy game show questions",
vectorizer_config=[
wvc.config.Configure.NamedVectors.text2vec_cohere(
name="jeopardy_questions_vector",
source_properties=["question"],
vectorize_collection_name=False,
),
wvc.config.Configure.NamedVectors.text2vec_openai(
name="jeopardy_answers_vector",
source_properties=["answer"],
vectorize_collection_name=False,
),
],
properties=[
wvc.config.Property(name="category", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="question", data_type=wvc.config.DataType.TEXT),
wvc.config.Property(name="answer", data_type=wvc.config.DataType.TEXT),
],
)
## CHECK VALUES - uncomment the next line to see the collection definition
# print(collection)
This assumes that all my embeddings would be got from services
What is a general solution?
let us assume that we need to embed some text with two embeddings
import cohere
coc = cohere.Client(api_key = "**************")
def get_embed_cohere(texts):
model = "large"
if type(texts) == list:
return coc.embed(texts = texts, model = model).embeddings
else:
return coc.embed(texts = [texts], model = model).embeddings[0]
import weaviate
import weaviate.classes as wvc
import os, json, requests
client = weaviate.connect_to_local(
headers = {
"X-OpenAI-Api-Key": "******************"
}
)
# Define a new schema
collection = client.collections.create(
name = "named_vec_class",
description = "named_vec_test",
vectorizer_config = [
wvc.config.Configure.NamedVectors.none(name = "cohere"),
wvc.config.Configure.NamedVectors.text2vec_openai(
name = "openai",
source_properties = ["text"],
vectorize_collection_name = False,
),
],
properties=[
wc.Property(name="tokens", data_type=wc.DataType.INT, vectorize_property_name = False),
wc.Property(name="text", data_type=wc.DataType.TEXT, vectorize_property_name = False),
wc.Property(name="uid", data_type=wc.DataType.UUID, skip_vectorisation = True, vectorize_property_name = False),
wc.Property(name="parent_uid", data_type=wc.DataType.UUID, skip_vectorisation = True, vectorize_property_name = False),
wc.Property(name="author", data_type=wc.DataType.TEXT, skip_vectorisation = True, vectorize_property_name = False),
wc.Property(name="book_name", data_type=wc.DataType.TEXT, skip_vectorisation = True, vectorize_property_name = False),
wc.Property(name="creation_date", data_type=wc.DataType.DATE, skip_vectorisation = True, vectorize_property_name = False),
wc.Property(name="chunk_type", data_type=wc.DataType.TEXT, skip_vectorisation = True, vectorize_property_name = False),
],
)
How do I now ingest data in the collection created above?
Hello! Duda from Weaviate here :)
as you are using named vectors, you can specify the vector while adding your object, like so:
collection.data.insert({
"text": "this is an example"
},
vector={
"cohere": [1,2,3,4]
})
Because you didn't provided a vector for the openai
named vector, Weaviate will vectorize it for you.
Check our documentation here for more example using other clients: https://weaviate.io/developers/weaviate/manage-data/create#create-an-object-with-named-vectors
Let me know if this helps! Thanks!