pythonnumpy

How to find indexes of similar values for each row in a Nx2 array?


Suppose a continuous variable A is changing slowly across time, its value and noise are logged in each row of a Nx2 array, how to find the values that belong to A from each row?

We can assume that A does not change very quickly, which means the values of A in two adjacent rows are much closer to each other than they are to the noises.

For example, matrix = [[-7, 3],[3.9, 8],[-3, 5.5],[5.2, 10]] , the expected indexes are [1,0,1,0], which refers to [3, 3.9, 5.5, 5.2] that belongs to A.

Thanks in advance!


Solution

  • import numpy as np
    
    def find_continuous_variable(matrix,A_pos):
        data = np.array(matrix)
        indexes = []
        current_A = data[0, A_pos] 
        
        for row in data:
            differences = np.abs(row - current_A)
            min_index = np.argmin(differences)
            indexes.append(min_index)
            current_A = row[min_index]
        
        return indexes
    
    
    matrix = [[-7, 3, 5], [3.9, 8, 6], [-3, 5.5, 0], [5.2, 10, 4]]
    result = find_continuous_variable(matrix,1)
    print(result)
    # [1, 0, 1, 0]
    matrix = [[-7, 3], [3.9, 8], [-3, 5.5], [5.2, 10]]
    result = find_continuous_variable(matrix,1)
    print(result)
    # [1, 0, 1, 0]
    

    import numpy as np
    
    def find_continuous_variable(matrix):
        data = np.array(matrix)
        
        min_total_dif = 10**10
        min_ans = []
    
        for initial_index in range(len(data[0])):
            now_total_dif = 0
            current_A = data[0, initial_index] 
            indexes = []
            
            for row in data:
                differences = np.abs(row - current_A)
                min_index = np.argmin(differences)
                now_total_dif += np.min(differences)
                indexes.append(min_index)
                current_A = row[min_index]
    
            if now_total_dif<min_total_dif:
                min_total_dif = now_total_dif
                min_ans = indexes
    
        return min_ans
    
    
    matrix = [[-7, 3, 5], [3.9, 8, 6], [-3, 5.5, 0], [5.2, 10, 4]]
    result = find_continuous_variable(matrix)
    print(result)
    # [2, 2, 1, 0], since [5,6,5.5,5.2] is a better answer than [3, 3.9, 5.5, 5.2]
    matrix = [[-7, 3], [3.9, 8], [-3, 5.5], [5.2, 10]]
    result = find_continuous_variable(matrix)
    print(result)
    # [1, 0, 1, 0]
    

    import numpy as np
    
    def find_continuous_variable(matrix):
        data = np.array(matrix)
        indexes = []
        initial_diffs = np.abs(data[1] - data[0])
        initial_index = np.argmin(initial_diffs)
        current_A = data[0, initial_index] 
        
        for row in data:
            differences = np.abs(row - current_A)
            min_index = np.argmin(differences)
            indexes.append(min_index)
            current_A = row[min_index]
        
        return indexes
    
    matrix = [[-7, 3, 5], [3.9, 8, 6], [-3, 5.5, 0], [5.2, 10, 4]]
    result = find_continuous_variable(matrix)
    print(result)
    # [2, 2, 1, 0], since [5,6,5.5,5.2] is a better answer than [3, 3.9, 5.5, 5.2]
    matrix = [[-7, 3], [3.9, 8], [-3, 5.5], [5.2, 10]]
    result = find_continuous_variable(matrix)
    print(result)
    # [1, 0, 1, 0]
    

    EDIT:If your data is always an Nx2 array, simply run the first function twice with A_pos set to 0 and 1.