pythonpandasnumpycluster-analysismean-shift

'numpy.float64' object is not iterable - meanshift clustering


python newbie here. I am trying to run this code but I get the error message that the object is not iterable. Would appreciate some advice on what I am doing wrong. Thanks.

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

temp = pd.read_csv("file.csv", encoding='latin-1')
xy = temp.ix[:,2:6]
X = xy.values

X
array([[             nan,              nan],
   [             nan,              nan],
   [  3.92144000e+00,              nan],
   [  4.42382000e+00,              nan],
   [  4.18931000e+00,   5.61562775e+02],
   [             nan,              nan],
   [  4.33025000e+00,   6.73123391e+02],
   [  6.43775000e+00,              nan],
   [  3.12299000e+00,   2.21886627e+03],
   [             nan,              nan],
   [             nan,              nan]])

from itertools import cycle
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')

class Mean_Shift:
    def __init__(self, radius=4):
        self.radius = radius

    def fit(self, data):
        centroids = {}

        for i in range(len(data)):
            centroids[i] = data[i]

        while True:
            new_centroids = []
            for i in centroids:
                in_bandwidth = []
                centroid = centroids[i]
                for featureset in data:
                    if np.linalg.norm(featureset-centroid) < self.radius:
                        in_bandwidth.append(featureset)

                new_centroid = np.average(in_bandwidth, axis=0)
                new_centroids.append(tuple(new_centroid))

            uniques = sorted(list(set(new_centroids)))

            prev_centroids = dict(centroids)

            centroids = {}
            for i in range(len(uniques)):
                centroids[i] = np.array(uniques[i])

            optimized = True

            for i in centroids:
                if not np.array_equal(centroids[i], prev_centroids[i]):
                    optimized = False

                if not optimized:
                    break

            if optimized:
                break

        self.centroids = centroids

    def predict(self,data):
        pass 


clf = Mean_Shift()
clf.fit(X)

centroids = clf.centroids

plt.scatter(X[:,0],X[:,1],s=50)

for c in centroids:
plt.scatter(centroids[c][0], centroids[c][1], color = 'k', marker='*', s=150)

plt.show()

Here is the error code i get:

/Users/carla/anaconda/lib/python3.5/site-packages/numpy/core/_methods.py:59: RuntimeWarning: Mean of empty slice.
  warnings.warn("Mean of empty slice.", RuntimeWarning)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-11-e13932b6e72d> in <module>()
 50 
 51 clf = Mean_Shift()
---> 52 clf.fit(X)
 53 
 54 centroids = clf.centroids

<ipython-input-11-e13932b6e72d> in fit(self, data)
 22 
 23                 new_centroid = np.average(in_bandwidth, axis=0)
---> 24                 new_centroids.append(tuple(new_centroid))
 25 
 26             uniques = sorted(list(set(new_centroids)))

TypeError: 'numpy.float64' object is not iterable

Solution

  • new_centroid = np.average(in_bandwidth, axis=0)
    

    Is assigning a scalar to new_centroid then you are trying to tuple(scalar) which is throwing the error.

    tuple(2.)
    
    ---------------------------------------------------------------------------
    TypeError                                 Traceback (most recent call last)
    <ipython-input-51-4406f9e676cf> in <module>()
    ----> 1 tuple(2.)
    
    TypeError: 'float' object is not iterable