When attempting to generate JSONL data using Llama Index, the process works well until the final step where the results are saved to a JSONL file. However, every time I try to save the data, it seems to be unsuccessful as I always receive the message "Wrote 0 examples to finetuning_events.jsonl". I am unsure of the reason behind this issue.
Wrote 0 examples to ./dataset_data/finetuning_events.jsonl
My code:
def jsonl_generation(self):
"""
Generate JSONL file for fine-tuning events and perform model refinement.
"""
# Initialize OpenAI FineTuningHandler and CallbackManager
finetuning_handler = OpenAIFineTuningHandler()
callback_manager = CallbackManager([finetuning_handler])
self.llm.callback_manager = callback_manager
# Load questions for fine-tuning from a file
questions = []
with open(f'{self.dataset_path}/train_questions.txt', "r", encoding='utf-8') as f:
for line in f:
questions.append(line.strip())
try:
# Generate responses to the questions using GPT-4 and save the fine-tuning events to a JSONL file
index = VectorStoreIndex.from_documents(
self.documents
)
query_engine = index.as_query_engine(similarity_top_k=2, llm=self.llm)
for question in questions:
response = query_engine.query(question)
except Exception as e:
# Handle the exception here, you might want to log the error or take appropriate action
print(f"An error occurred: {e}")
finally:
# Save the fine-tuning events to a JSONL file
finetuning_handler.save_finetuning_events(f'{self.dataset_path}/finetuning_events.jsonl')
I just solved the problem. It's my solution. Currently, It's storing the dataset to jsonl data.
def jsonl_generation(self):
"""
Generate JSONL file for fine-tuning events and perform model refinement.
"""
# Initialize OpenAI FineTuningHandler and CallbackManager
finetuning_handler = OpenAIFineTuningHandler()
callback_manager = CallbackManager([finetuning_handler])
llm = OpenAI(model="gpt-4", temperature=0.3)
Settings.callback_manager, = (callback_manager,)
# Load questions for fine-tuning from a file
questions = []
with open(f'{self.dataset_path}/train_questions.txt', "r", encoding='utf-8') as f:
for line in f:
questions.append(line.strip())
try:
from llama_index.core import VectorStoreIndex
# Generate responses to the questions using GPT-4 and save the fine-tuning events to a JSONL file
index = VectorStoreIndex.from_documents(
self.documents
)
query_engine = index.as_query_engine(similarity_top_k=2, llm=llm)
for question in questions:
response = query_engine.query(question)
except Exception as e:
# Handle the exception here, you might want to log the error or take appropriate action
print(f"An error occurred: {e}")
finally:
# Save the fine-tuning events to a JSONL file
finetuning_handler.save_finetuning_events(f'{self.dataset_path}/finetuning_events.jsonl')