The following code returns error:
pydantic_core.pydantic_core.ValidationError: 1 validation error for NodeWithScore node Input should be a valid dictionary or instance of BaseNode [type=model_type, input_value=TextNode(id='dcb36e46-7a...metadata_seperator='\n'), input_type=TextNode] For further information visit https://errors.pydantic.dev/2.9/v/model_type
import textwrap
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.legacy.vector_stores import PGVectorStore
from llama_index.llms.openai import OpenAI
from sqlalchemy import make_url
import os
import openai
# Get openAI api key by reading local .env file
openai.api_key = ("my-api-key")
os.environ["OPENAI_API_KEY"] = openai.api_key
print("Connecting to new vector db...")
connection_string = "postgresql://user:user@localhost:5432"
db_name = "vector_db"
url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
database=db_name,
host=url.host,
password=url.password,
port=url.port,
user=url.username,
table_name="table_name",
embed_dim=1536, # openai embedding dimension
)
index = VectorStoreIndex.from_vector_store(
vector_store=vector_store)
llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
query_engine = index.as_query_engine(llm=llm)
response = query_engine.query("Give me a summary of the data with code AA10B")
print(textwrap.fill(str(response), 100))
My problem is, that if instead of trying to use VectorStoreIndex.from_vector_store I use VectorStoreIndex.from_documents passing directly the document list, it works perfectly fine. So I'm not sure what I'm doing wrong here.
The code that works, looks like the following:
import textwrap
from llama_index.core import StorageContext
from llama_index.llms.openai import OpenAI
from llama_index.readers.database import DatabaseReader
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.postgres import PGVectorStore
import openai
import os
from sqlalchemy import make_url, create_engine, text
import psycopg2
# Get openAI api key by reading local .env file
openai.api_key = ("my-api-key")
os.environ["OPENAI_API_KEY"] = openai.api_key
engine = create_engine("postgresql+psycopg2://user:user@localhost/db")
reader = DatabaseReader(
engine=engine
)
query = """query"""
documents = reader.load_data(query=query)
# Recreate database if exists
conn = psycopg2.connect("postgres://user:user@localhost:5432/db")
conn.autocommit = True
cur = conn.cursor()
cur.execute("DROP DATABASE IF EXISTS vector_db;")
cur.execute("CREATE DATABASE vector_db;")
conn.close()
conn = psycopg2.connect("postgres://user:user@localhost:5432/vector_db")
conn.autocommit = True
cur = conn.cursor()
cur.execute("CREATE EXTENSION vector;")
conn.close()
connection_string = "postgresql://user:user@localhost:5432"
db_name = "vector_db"
url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
database=db_name,
host=url.host,
password=url.password,
port=url.port,
user=url.username,
table_name="table",
embed_dim=1536, # openai embedding dimension
hnsw_kwargs={
"hnsw_m": 16,
"hnsw_ef_construction": 64,
"hnsw_ef_search": 40,
"hnsw_dist_method": "vector_cosine_ops",
},
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents, storage_context=storage_context, show_progress=True
)
llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
query_engine = index.as_query_engine(llm=llm)
response = query_engine.query("Give me a summary of the data with code AA10B")
print(textwrap.fill(str(response), 100))
Well, I found the error to my own script. The difference between one script and the other are the following parameters in the connection to the PGVectore store:
hnsw_kwargs={
"hnsw_m": 16,
"hnsw_ef_construction": 64,
"hnsw_ef_search": 40,
"hnsw_dist_method": "vector_cosine_ops",
},
This needs to be there so that the query to the database is in the same format as it was when being inserted (pretty obvious, isn't it?). When I was trying to add this, I was getting an error of the kind "PGVectorStore.from_params doesn't have any hnsw_kwargs parameters", so apparently I had two different set of functions with the same name. The problem was that I was using different PGVector packages in both scripts. When I was inserting I was using the core PGVector import and while querying I was using the legacy one which doesn't accept these parameters. That's the reason why I was getting the pydantic error.