Either I've set this up wrong or I'm misunderstanding the vector database.
I have the phrase, "What's the best bet on the table", I'm embedding it to get the vector, then savings to firestore as a vector. My intention is to match sentences like, "What's the BEST bet on the table" with a phrase like, "What's the TOP bet on the table".
Here is my save code:
require('dotenv').config({ path: __dirname + '/.env' })
const { initializeApp, cert } = require('firebase-admin/app');
const { getFirestore, FieldValue } = require('firebase-admin/firestore');
const serviceAccount = require('./secrets/crapsai-72b89-firebase-adminsdk-pnqva-e321c8eecb.json');
const { gemini15Flash, googleAI } = require('@genkit-ai/googleai');
const { textEmbeddingGecko001 } = require('@genkit-ai/googleai');
const { genkit } = require('genkit');
const ai = genkit({
plugins: [
googleAI({
apiKey: process.env.GOOGLE_GENAI_API_KEY
})
],
model: gemini15Flash
});
const app = initializeApp({
credential: cert(serviceAccount)
});
const firestore = getFirestore(app);
var question = "What's the best bet on the table";
(async () => {
try {
const embedding = await ai.embed({
embedder: textEmbeddingGecko001,
content: question
});
console.log("Got the embeddings");
console.log(embedding);
await firestore.collection("questions").add({
"question": question,
"embedding": FieldValue.vector(embedding),
"confirmed": true
});
console.log("DONE");
process.exit(0);
} catch (e) {
console.log("Error", e);
process.exit(1);
};
})();
Here is what that looks like in firestore
Here is the retrieval code:
require('dotenv').config({ path: __dirname + '/.env' })
const { initializeApp, cert } = require('firebase-admin/app');
const { getFirestore } = require('firebase-admin/firestore');
const serviceAccount = require('./secrets/crapsai-72b89-firebase-adminsdk-pnqva-e321c8eecb.json');
const { gemini15Flash, googleAI, textEmbeddingGecko001 } = require('@genkit-ai/googleai');
const { defineFirestoreRetriever } = require('@genkit-ai/firebase');
const { genkit } = require('genkit');
const ai = genkit({
plugins: [
googleAI({
apiKey: process.env.GOOGLE_GENAI_API_KEY
})
],
model: gemini15Flash
});
const app = initializeApp({
credential: cert(serviceAccount)
});
const firestore = getFirestore(app);
const retriever = defineFirestoreRetriever(ai, {
name: 'questionRetriever',
firestore,
collection: 'questions',
contentField: 'question',
vectorField: 'embedding',
embedder: textEmbeddingGecko001,
distanceMeasure: 'COSINE'
});
var question = "foo";
(async () => {
try {
const docs = await ai.retrieve({
retriever,
query: question,
options: {
limit: 5,
where: { confirmed: true }
},
});
docs.forEach(doc => {
console.log(doc);
})
console.log("DONE");
process.exit(0);
} catch (e) {
console.log("Error", e);
process.exit(1);
};
})();
Then when I run the code for retrieval based on the question, 'foo' expecting to NOT get a match. I get a match.
SO either I've set this up wrong or I'm misunderstanding the vector db.
SOLVED: So it's ranking them, I had so little data it was ranking the unliked highest. When I added more data I could see that it's working, it just returns something no matter what.
The code above in the post worked for me.