pythonpandasdataframetext-miningtextblob

TextBlob error: too many values to unpack


I am trying to run the following code, but I have gotten an error that are too many values to unpack

The code is:

import csv
import json
import pandas as pd

df = pd.read_csv("job/my_data_frame_test.csv", encoding="utf-8")

df.info()
print(df)
TEXT text recommended
ABC yes
DEF no
from textblob import TextBlob
    
from textblob.classifiers import NaiveBayesClassifier
    
cl = NaiveBayesClassifier(df)

After running this code, I have the following error (in full)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-7-3d683b8c482a> in <module>
----> 1 cl = NaiveBayesClassifier(df)

/usr/local/lib/python3.8/dist-packages/textblob/classifiers.py in __init__(self, train_set, feature_extractor, format, **kwargs)
    203     def __init__(self, train_set,
    204                  feature_extractor=basic_extractor, format=None, **kwargs):
--> 205         super(NLTKClassifier, self).__init__(train_set, feature_extractor, format, **kwargs)
    206         self.train_features = [(self.extract_features(d), c) for d, c in self.train_set]
    207 

/usr/local/lib/python3.8/dist-packages/textblob/classifiers.py in __init__(self, train_set, feature_extractor, format, **kwargs)
    137         else:  # train_set is a list of tuples
    138             self.train_set = train_set
--> 139         self._word_set = _get_words_from_dataset(self.train_set)  # Keep a hidden set of unique words.
    140         self.train_features = None
    141 

/usr/local/lib/python3.8/dist-packages/textblob/classifiers.py in _get_words_from_dataset(dataset)
     61             return words
     62     all_words = chain.from_iterable(tokenize(words) for words, _ in dataset)
---> 63     return set(all_words)
     64 
     65 def _get_document_tokens(document):

/usr/local/lib/python3.8/dist-packages/textblob/classifiers.py in <genexpr>(.0)
     60         else:
     61             return words
---> 62     all_words = chain.from_iterable(tokenize(words) for words, _ in dataset)
     63     return set(all_words)
     64 

ValueError: too many values to unpack (expected 2)

Solution

  • NaiveBayesClassifier() expects a list of tuples of the form (text, label):

    train = list(zip(df['TEXT'], df['text recommended']))
    # [('ABC', 'yes'), ('DEF', 'no')]
    
    cl = NaiveBayesClassifier(train)
    # <NaiveBayesClassifier trained on 2 instances>