I'm trying to create an embedding vector database with some .txt documents in my local folder. In particular I'm following this tutorial from the official page of LangChain: LangChain - Azure Cognitive Search and Azure OpenAI. I have followed all the steps of the tutorial and this is my Python script:
# From https://python.langchain.com/docs/integrations/vectorstores/azuresearch
import openai
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch
os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_BASE"] = "https://xxxxxx.openai.azure.com"
os.environ["OPENAI_API_KEY"] = "xxxxxxxxx"
os.environ["OPENAI_API_VERSION"] = "2023-05-15"
model: str = "text-embedding-ada-002"
vector_store_address: str = "https://xxxxxxx.search.windows.net"
vector_store_password: str = "xxxxxxx"
embeddings: OpenAIEmbeddings = OpenAIEmbeddings(deployment=model, chunk_size=1)
index_name: str = "cognitive-search-openai-exercise-index"
vector_store: AzureSearch = AzureSearch(
azure_search_endpoint=vector_store_address,
azure_search_key=vector_store_password,
index_name=index_name,
embedding_function=embeddings.embed_query,
)
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
loader = TextLoader("C:/Users/xxxxxxxx/azure_openai_cognitive_search_exercise/data/qna/a.txt", encoding="utf-8")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
vector_store.add_documents(documents=docs)
# Perform a similarity search
docs = vector_store.similarity_search(
query="Who is Pippo Franco?",
k=3,
search_type="similarity",
)
print(docs[0].page_content)
Now, when I run the script I get the following error:
vector_search_configuration is not a known attribute of class <class 'azure.search.documents.indexes.models._index.SearchField'> and will be ignored
algorithm_configurations is not a known attribute of class <class 'azure.search.documents.indexes._generated.models._models_py3.VectorSearch'> and will be ignored
Traceback (most recent call last):
File "C:\Users\xxxxxxxxx\venv\Lib\site-packages\langchain\vectorstores\azuresearch.py", line 105, in _get_search_client
index_client.get_index(name=index_name)
File "C:\Users\xxxxxxx\venv\Lib\site-packages\azure\core\tracing\decorator.py", line 78, in wrapper_use_tracer
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\xxxxxxx\KYF\venv\Lib\site-packages\azure\search\documents\indexes\_search_index_client.py", line 145, in get_index
result = self._client.indexes.get(name, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\xxxxxx\venv\Lib\site-packages\azure\core\tracing\decorator.py", line 78, in wrapper_use_tracer
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\xxxxxx\KYF\venv\Lib\site-packages\azure\search\documents\indexes\_generated\operations\_indexes_operations.py",
line 864, in get
map_error(status_code=response.status_code, response=response, error_map=error_map)
File "C:\Users\xxxxxxxx\venv\Lib\site-packages\azure\core\exceptions.py", line 165, in map_error
raise error
azure.core.exceptions.ResourceNotFoundError: () No index with the name 'cognitive-search-openai-exercise-index' was found in the service 'cognitive-search-openai-exercise'.
Code:
Message: No index with the name 'cognitive-search-openai-exercise-index' was found in the service 'cognitive-search-openai-exercise'.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\Users\xxxxxxx\venv\azure_openai_cognitive_search_exercise\test.py", line 25, in <module>
vector_store: AzureSearch = AzureSearch(
^^^^^^^^^^^^
File "C:\Users\xxxxxxx\venv\Lib\site-packages\langchain\vectorstores\azuresearch.py", line 237, in __init__
self.client = _get_search_client(
^^^^^^^^^^^^^^^^^^^
File "C:\Users\xxxxxxxx\venv\Lib\site-packages\langchain\vectorstores\azuresearch.py", line 172, in _get_search_client
index_client.create_index(index)
File "C:\Users\xxxxxxx\venv\Lib\site-packages\azure\core\tracing\decorator.py", line 78, in wrapper_use_tracer
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\xxxxxxx\venv\Lib\site-packages\azure\search\documents\indexes\_search_index_client.py", line 220, in create_index
result = self._client.indexes.create(patched_index, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\xxxxxxx\venv\Lib\site-packages\azure\core\tracing\decorator.py", line 78, in wrapper_use_tracer
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\xxxxxx\venv\Lib\site-packages\azure\search\documents\indexes\_generated\operations\_indexes_operations.py",
line 402, in create
raise HttpResponseError(response=response, model=error)
azure.core.exceptions.HttpResponseError: (InvalidRequestParameter) The request is invalid. Details: definition : The vector field 'content_vector' must have the property 'vectorSearchConfiguration' set.
Code: InvalidRequestParameter
Message: The request is invalid. Details: definition : The vector field 'content_vector' must have the property 'vectorSearchConfiguration' set.
Exception Details: (InvalidField) The vector field 'content_vector' must have the property 'vectorSearchConfiguration' set. Parameters: definition
Code: InvalidField
Message: The vector field 'content_vector' must have the property 'vectorSearchConfiguration' set. Parameters: definition
I have created an index manually from the Azure Cognitive Search Console, but I don't think this is the correct approach, as the script should automatically create a new index.
Please use pip install azure-search-documents==11.4.0b8
to ensure you are using the azure cognitive search python SDK compatible with LangChain.