pythonordereddictionarytensorflow-federatedfederated-learningordereddict

TypeError: expected at most 1 arguments, got 2....... data = collections.OrderedDict('data', distributed_data[i])


I am getting the following error related to this function definition what is wrong?

Convert_to_client_data() is a function in federated learning where I am trying to convert a dataset into the federated dataset.

Here is the declaration of the class Distribute which is used in the function which gives the error

#Declaration of Class Distribute

def partition_list (list_in, n):
    random.shuffle(list_in)
    return [list_in[i::n] for i in range(n)]

class Distribute:
    def __init__(self, data, data_type):

        self.data = data
        self.data_type = data_type.lower()
        self.selected_feature = -1
        self.type = 'iid'
        self.client_no = 10
        self.data_sample_fraction = 0.1
        self.min_user_number = 10
        self.max_user_number = 20
        self.train_data_fraction = 0.9
        self.random_sampling_seed = 4
        self.random_split_seed = 1
        self.split_type = 'sample'

    def __shuffle(self, data, label):
        random.Random(self.random_sampling_seed).shuffle(data)

    def _iid_no_clint(self):
        size = random.randrange(2, len(self.data))
        self.__shuffle(self.data)

        glist = []
        group_size = int(len(self.data) / size)
        for i in range(size):
            glist.append(self.data[group_size * i: group_size * (i + 1)])

        return glist

    def _iid_clint(self, number_of_clients):

        self.__shuffle(self.data)

        glist = []
        group_size = int(len(self.data) / number_of_clients)

        for i in range(number_of_clients):
            glist.append(self.data[group_size * i: group_size * (i + 1)])

        return glist

    def _iid(self, **kwargs):
        number_of_clients = kwargs.get('number_of_clients')
        if number_of_clients:
            return self._iid_clint(number_of_clients)
        else:
            return self._iid_no_clint()

    def _niid(self, **kwargs):

        selected_feature = kwargs.get('selected_feature', self.selected_feature)
        min_user_number = kwargs.get('min_user_number', self.min_user_number)
        max_user_number = kwargs.get('max_user_number', self.max_user_number)
        number_of_clients = kwargs.get('number_of_clients')

        data_type = kwargs.get('data_type')

        if data_type == 'image':
            if number_of_clients:
                if number_of_clients > len(self.data):
                    raise ValueError('Total number of data:', len(self.data),
                                     'is less than total number of clients specified:', number_of_clients)
                else:
                    data = self.__select_feature_image_client(number_of_clients)
            else:
                data = self.__select_feature_image_no_client(min_user_number, max_user_number)

        elif data_type == 'text':
            if number_of_clients:
                if number_of_clients > len(self.data):
                    raise ValueError('Total number of data:', len(self.data),
                                     'is less than total number of clients specified:', number_of_clients)
                else:
                    data = self.__select_feature_text_client(number_of_clients)
            else:
                data = self.__select_feature_text_no_client(min_user_number, max_user_number)

        elif data_type == 'csv':
            if number_of_clients:
                if number_of_clients > len(self.data):
                    raise ValueError('Total number of data:', len(self.data),
                                     'is less than total number of clients specified:', number_of_clients)
                else:
                    data = self.__select_feature_csv_client(number_of_clients)
            else:
                data = self.__select_feature_csv_no_client(min_user_number, max_user_number)
        else:
            raise ValueError(
                f'Given data type: "{data_type}" is not correct, choose between options "text" or "image".')

        return data

    def distribute_data(self, **kwargs):
        if kwargs.get('dist_type', self.type) == 'iid':
            return self._iid(**kwargs)
        else:
            return self._niid(**kwargs)

    def __select_feature_image_no_client(self, min_user_number, max_user_number):

        client_size = random.randint(min_user_number, max_user_number)
        grouped_data = partition_list (self.data, client_size)

        return grouped_data

    def __select_feature_image_client(self, number_of_clients):

        grouped_data = np.array_split(self.data, number_of_clients)

        return grouped_data

    def __select_feature_text_no_client(self, min_user_number, max_user_number):

        client_size = random.randint(min_user_number, max_user_number)
        grouped_data = partition_list (self.data, client_size)

        return grouped_data

    def __select_feature_text_client(self, number_of_clients):

        grouped_data = np.array_split(self.data, number_of_clients)

        return grouped_data

    def __select_feature_csv_no_client(self, min_user_number, max_user_number):

        client_size = random.randint(min_user_number, max_user_number)
        grouped_data = partition_list (self.data, client_size)

        return grouped_data

    def __select_feature_csv_client(self, number_of_clients):

        grouped_data = np.array_split(self.data, number_of_clients)

        return grouped_data

        
    def split_data(self, x, y, **kwargs):
        train_data_fraction = kwargs.get('train_data_fraction', self.train_data_fraction)
        if kwargs.get('type', self.type) == 'sample':
            return self._sample_split(x, y, train_data_fraction)
        else:
            return self._user_split(train_data_fraction)

    def _user_split(self, train_data_fraction):
        rng_seed = (self.random_split_seed if (self.random_split_seed is not None and self.random_split_seed >= 0)
                    else int(time.time()))
        rng = random.Random(rng_seed)
        # randomly sample from user_files to pick training set users
        num_users = self.client_no
        num_train_users = int(train_data_fraction * num_users)
        indices = [i for i in range(num_users)]
        train_indices = rng.sample(indices, num_train_users)
        train_blist = [False for i in range(num_users)]
        for i in train_indices:
            train_blist[i] = True
        train_user_files = []
        test_user_files = []
        train_labels = []
        test_labels = []

        for i in range(num_users):
            if train_blist[i]:
                train_user_files.append(self.data[i])
                train_labels.append(self.label[i])
            else:
                test_user_files.append(self.data[i])
                test_labels.append(self.label[i])

        return train_user_files, test_user_files, train_labels, test_labels

    def _sample_split(self, x, y, train_data_fraction):
        x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=train_data_fraction)
        return x_train, x_test, y_train, y_test


#DATA variable


    data_type = 'text'
    input_path = '/content/drive/MyDrive/Divya-Yasaman/v2/data/text/topics_sample'  # accepts either folder or csv file
    
    obj = Reader(data_type, input_path)
    
    %%time
    data = obj.read_data()
  

#function DEFINITION which gives the error

def convert_to_client_data(data, data_type, **kwargs):

    distributor_obj = Distribute(data, data_type)

    distributed_data = distributor_obj.distribute_data(data_type=data_type, **kwargs)
    

    client_train_dataset = collections.OrderedDict()

    for i in range(len(distributed_data)):
        client_name = "client_" + str(i)
        data = collections.OrderedDict('data', distributed_data[i])
       # data = collections.OrderedDict( distributed_data[i])
        client_train_dataset[client_name] = data

    print(f'Converting data to {len(distributed_data)} client data...')

    train_dataset = tff.simulation.datasets.TestClientData(client_train_dataset)

    print(f'Data successfully converted to {len(distributed_data)} client data.')

    return train_dataset

ERROR STATEMENT for the function definition

<decorator-gen-53> in time(self, line, cell, local_ns)

<timed exec> in <module>()

<ipython-input-60-7b390d37230c> in convert_to_client_data(data, data_type, **kwargs)
     13     for i in range(len(distributed_data)):
     14         client_name = "client_" + str(i)
---> 15         data = collections.OrderedDict('data', distributed_data[i])
     16        # data = collections.OrderedDict( distributed_data[i])
     17         client_train_dataset[client_name] = data

TypeError: expected at most 1 arguments, got 2

Solution

  • collections.OrderedDict() takes the same arguments as dict(): a sequence of key/value pairs to put in the dictionary. It doesn't take the key and value as separate arguments.

    If data is supposed to be the key, don't put it as a separate argument.

    data = collections.OrderedDict([('data', distributed_data[i])])
    

    Also, as of Python 3.6, regular dictionaries retain their insertion order, so you may not need to use OrderedDict. Just write:

    data = {'data': distributed_data[i]}