I have code:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
conversations = [
("Hello", "Hi there!"),
("How are you?", "I'm doing well, thanks."),
("What's your name?", "I'm a chatbot."),
]
tokenizer = Tokenizer()
tokenizer.fit_on_texts(conversations)
vocab_size = len(tokenizer.word_index) + 1
sequences = tokenizer.texts_to_sequences(conversations)
max_sequence_len = max([len(seq) for seq in sequences])
X, y = zip(*sequences)
X = pad_sequences(X, maxlen=max_sequence_len, padding='post')
y = pad_sequences(y, maxlen=max_sequence_len, padding='post')
model = Sequential([
Embedding(vocab_size, 64, input_length=max_sequence_len, mask_zero=True),
LSTM(100, return_sequences=True),
Dense(vocab_size, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X, y, epochs=50, verbose=1)
def generate_response(input_text):
input_seq = tokenizer.texts_to_sequences([input_text])
padded_input = pad_sequences(input_seq, maxlen=max_sequence_len, padding='post')
predicted_output = model.predict(padded_input)
predicted_word_index = tf.argmax(predicted_output, axis=-1).numpy()
response = tokenizer.sequences_to_texts(predicted_word_index)
return response[0]
while True:
user_input = input(">>> ")
response = generate_response(user_input)
print(f"Chatbot: {response}")
But I have error:
Traceback (most recent call last):
File "C:\Users\mceca\Desktop\chatbot.py", line 14, in <module>
tokenizer.fit_on_texts(conversations)
File "C:\Users\mceca\AppData\Roaming\Python\Python310\site-packages\keras\preprocessing\text.py", line 293, in fit_on_texts
seq = text_to_word_sequence(
File "C:\Users\mceca\AppData\Roaming\Python\Python310\site-packages\keras\preprocessing\text.py", line 74, in text_to_word_sequence
input_text = input_text.lower()
AttributeError: 'tuple' object has no attribute 'lower'
I have installed tensorflow==2.12.0
I don't want to change tensorflow version, and my OS is windows 10 and I don't have GPU, I only have CPU.
I work with keras, but if you have suggestion for other way in tensorflow, you can suggest me.
How I can fix this error? If you have more answers, type all.
The error you're encountering occurs because the Tokenizer class from keras.preprocessing.text expects a list of strings (i.e., text samples) but is receiving a list of tuples instead.
Understanding the Error
The error message AttributeError: 'tuple' object has no attribute 'lower'
happens because the Tokenizer.fit_on_texts method is trying to call the .lower() method on each text sample, but it encounters a tuple instead of a string.
Solution You need to adjust your code so that you pass a list of text strings rather than tuples. For your chatbot task, you should separate the input and output text into two different lists, then combine them when needed.
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
# Prepare the conversation data
inputs = ["Hello", "How are you?", "What's your name?"]
responses = ["Hi there!", "I'm doing well, thanks.", "I'm a chatbot."]
# Initialize and fit the tokenizer on the input texts
tokenizer = Tokenizer()
tokenizer.fit_on_texts(inputs + responses) # Fit on both inputs and responses
# Define vocabulary size
vocab_size = len(tokenizer.word_index) + 1
# Convert texts to sequences
input_sequences = tokenizer.texts_to_sequences(inputs)
response_sequences = tokenizer.texts_to_sequences(responses)
# Determine the maximum sequence length
max_sequence_len = max(max(len(seq) for seq in input_sequences),
max(len(seq) for seq in response_sequences))
# Pad sequences
X = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='post')
y = pad_sequences(response_sequences, maxlen=max_sequence_len, padding='post')
# Define the model
model = Sequential([
Embedding(vocab_size, 64, input_length=max_sequence_len, mask_zero=True),
LSTM(100, return_sequences=True),
Dense(vocab_size, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Fit the model
model.fit(X, y, epochs=50, verbose=1)
def generate_response(input_text):
input_seq = tokenizer.texts_to_sequences([input_text])
padded_input = pad_sequences(input_seq, maxlen=max_sequence_len, padding='post')
predicted_output = model.predict(padded_input)
predicted_word_index = tf.argmax(predicted_output, axis=-1).numpy()[0]
response = tokenizer.sequences_to_texts([predicted_word_index])
return response[0]
# Interaction loop
while True:
user_input = input(">>> ")
response = generate_response(user_input)
print(f"Chatbot: {response}")