What is the faster numpythonic way of this normalization:
def normalize_vector(x, b, axis):
"""
Normalize real vector x and outputs an integer vector y.
Parameters:
x (numpy.ndarray): Input real vector. (batch_size, seq_len)
b (int): Unsigned integer defining the scaling factor.
axis (int/None): if None, perform flatenned version, if axis=-1, perform relative normalization across batch.
Returns:
numpy.ndarray: Integer vector y.
"""
# Find the maximum absolute value in x
m = np.max(np.abs(x))
# Process each element in x
y = []
for xi in x:
if xi > 0:
y.append(int((2**b - 1) * xi / m))
elif xi < 0:
y.append(int(2**b * xi / m))
else:
y.append(0)
return np.array(y)
Can np.digitize
make it faster?
I have similar question, but it's not about NumPy.
I'm also expecting it supports axis
parameter for batch vector.
there is np.piecewise to transform data based on multiple conditions.
def normalize_vector2(x, b, axis):
# Step 1: Find the maximum absolute value in `x`
m = np.max(np.abs(x), axis=axis)
y = np.piecewise(x, [x > 0, x < 0],
[
lambda xi: ((2**b - 1) * xi / m),
lambda xi: (2**b * xi / m)
])
return y.astype(int)
if your paths are close then you can just simplify it with multiplies.
def normalize_vector3(x, b, axis):
# Step 1: Find the maximum absolute value in x
m = np.max(np.abs(x), axis=axis, keepdims=True)
m[m==0] = 1
y = (2**b - 1 * (x > 0)) * x / m
return y.astype(int)
comparison:
import numpy as np
import time
def normalize_vector2(x, b):
# Step 1: Find the maximum absolute value in `x`
m = np.max(np.abs(x))
y = np.piecewise(x, [x > 0, x < 0],
[
lambda xi: ((2**b - 1) * xi / m),
lambda xi: (2**b * xi / m)
])
return y.astype(int)
def normalize_vector3(x, b, axis):
# Step 1: Find the maximum absolute value in x
m = np.max(np.abs(x), axis=axis, keepdims=True)
m[m==0] = 1
y = (2**b - 1 * (x > 0)) * x / m
return y.astype(int)
def normalize_vector(x, b):
# Find the maximum absolute value in x
m = np.max(np.abs(x))
# Process each element in x
y = []
for xi in x:
if xi > 0:
y.append(int((2**b - 1) * xi / m))
elif xi < 0:
y.append(int(-2**b * xi / m))
else:
y.append(0)
return np.array(y)
for elements in [10, 100, 1000, 10000]:
iterations = int(100000 / elements)
x = np.random.random(elements) * 256-128
t1 = time.time()
for i in range(iterations):
normalize_vector(x,7)
t2 = time.time()
for i in range(iterations):
normalize_vector2(x, 7)
t3 = time.time()
for i in range(iterations):
normalize_vector3(x, 7, 0)
t4 = time.time()
print(f"{(t2-t1)/iterations:.7f}, {elements} elements python")
print(f"{(t3-t2)/iterations:.7f}, {elements} elements numpy")
print(f"{(t4-t3)/iterations:.7f}, {elements} elements numpy maths")
0.0000109, 10 elements python
0.0000331, 10 elements numpy
0.0000158, 10 elements numpy maths
0.0000589, 100 elements python
0.0000399, 100 elements numpy
0.0000168, 100 elements numpy maths
0.0005812, 1000 elements python
0.0000515, 1000 elements numpy
0.0000255, 1000 elements numpy maths
0.0045110, 10000 elements python
0.0003255, 10000 elements numpy
0.0001083, 10000 elements numpy maths
numpy is slower than pure python for small lists (mostly < 50 elements).