weaviate

How to use weaviate named vectors?


The example from the official documentation is as follows:

import weaviate
import weaviate.classes as wvc
import os, json, requests

client = weaviate.connect_to_local(
    headers={
        "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"],
        "X-Cohere-Api-Key": os.environ["COHERE_API_KEY"],
    }
)

# Define a new schema
collection = client.collections.create(
    name="Named_Vector_Jeopardy_Collection",
    description="Jeopardy game show questions",
    vectorizer_config=[
        wvc.config.Configure.NamedVectors.text2vec_cohere(
            name="jeopardy_questions_vector",
            source_properties=["question"],
            vectorize_collection_name=False,
        ),
        wvc.config.Configure.NamedVectors.text2vec_openai(
            name="jeopardy_answers_vector",
            source_properties=["answer"],
            vectorize_collection_name=False,
        ),
    ],
    properties=[
        wvc.config.Property(name="category", data_type=wvc.config.DataType.TEXT),
        wvc.config.Property(name="question", data_type=wvc.config.DataType.TEXT),
        wvc.config.Property(name="answer", data_type=wvc.config.DataType.TEXT),
    ],
)

## CHECK VALUES - uncomment the next line to see the collection definition
# print(collection)

This assumes that all my embeddings would be got from services

What is a general solution?

let us assume that we need to embed some text with two embeddings

  1. with openai API
  2. By supplying my own vectors
import cohere
coc = cohere.Client(api_key = "**************")
def get_embed_cohere(texts):
    model = "large"
    if type(texts) == list:
        return coc.embed(texts = texts, model = model).embeddings
    else:
        return coc.embed(texts = [texts], model = model).embeddings[0]

import weaviate
import weaviate.classes as wvc
import os, json, requests

client = weaviate.connect_to_local(
    headers = {
        "X-OpenAI-Api-Key": "******************"
    }
)

# Define a new schema
collection = client.collections.create(
    name = "named_vec_class",
    description = "named_vec_test",
    vectorizer_config = [
        wvc.config.Configure.NamedVectors.none(name = "cohere"),
        wvc.config.Configure.NamedVectors.text2vec_openai(
            name = "openai",
            source_properties = ["text"],
            vectorize_collection_name = False,
        ),
    ],
    properties=[
        wc.Property(name="tokens", data_type=wc.DataType.INT, vectorize_property_name = False),
        wc.Property(name="text", data_type=wc.DataType.TEXT, vectorize_property_name = False),
        wc.Property(name="uid", data_type=wc.DataType.UUID, skip_vectorisation = True, vectorize_property_name = False),
        wc.Property(name="parent_uid", data_type=wc.DataType.UUID, skip_vectorisation = True, vectorize_property_name = False),
        wc.Property(name="author", data_type=wc.DataType.TEXT, skip_vectorisation = True, vectorize_property_name = False),
        wc.Property(name="book_name", data_type=wc.DataType.TEXT, skip_vectorisation = True, vectorize_property_name = False),
        wc.Property(name="creation_date", data_type=wc.DataType.DATE, skip_vectorisation = True, vectorize_property_name = False),
        wc.Property(name="chunk_type", data_type=wc.DataType.TEXT, skip_vectorisation = True, vectorize_property_name = False),
    ],
)

How do I now ingest data in the collection created above?


Solution

  • Hello! Duda from Weaviate here :)

    as you are using named vectors, you can specify the vector while adding your object, like so:

    collection.data.insert({
        "text": "this is an example"
    },
    vector={
        "cohere": [1,2,3,4]
    })
    

    Because you didn't provided a vector for the openai named vector, Weaviate will vectorize it for you.

    Check our documentation here for more example using other clients: https://weaviate.io/developers/weaviate/manage-data/create#create-an-object-with-named-vectors

    Let me know if this helps! Thanks!