python tensorflow computer-vision mnist adversarial-machines

Tensorflow - numpy gradient check doesnt work

I'm trying to estimate the gradient of a function by the finite difference method : finite difference method for estimating gradient

TLDR:

grad f(x) = [f(x+h)-f(x-h)]/(2h) for sufficiently small h.

this is also used in the gradient check phase to check your backpropagation in AI as you might know.

This is my network :

def defineModel():
  global model
  model = Sequential()
  model.add(keras.Input(shape=input_shape))
  model.add(layers.Conv2D(32, kernel_size=(3, 3), activation="relu"))
  model.add(layers.MaxPooling2D(pool_size=(2, 2)))
  model.add(layers.Conv2D(64, kernel_size=(3, 3), activation="relu"))
  model.add(layers.MaxPooling2D(pool_size=(2, 2)))
  model.add( layers.Flatten())
  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(num_classes, activation="softmax"))
  model.build()
  model.summary()

This part is fine and has no bugs. I just mentioned it here so you have a sense of my model. I work on MNIST so everything is pretty straightforward. With 1 epoch and a few lines of the TF code, I reached +98% accuracy which is quite good for a makeshift model.

Since I'm doing adversarial training, I want the gradient of my loss with respect to the input data:

by the way, I used the tiling idea:

if you cover the input image with square tiles of (tile*tile) dimension with no overlapping, you can assume the gradient of the image within tiles is pretty much constant so it's a reasonable approximation. But as a matter of debugging, in my code tile=1 so we're calculating the pixel-wise gradient.

and this is where the problem lies, but I cannot figure out where! I controlled the loss for loss(x+h) and loss(x-h) and loss(x) are pretty much in a close range so I know it is fine. also my TF automatic backpropagation works fine I have tested it. the problem must be with the way of computing manual gradient.

tile=1
h=1e-4 #also tried 1e-5, 1e-6 but did not work

#A dummy function to wait
def w():
  print()
  ww=input('wait')
  print()

#This function works fine.
def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

#the problem is here. given an index suah is idx, I'm going to manipulate
#x_test[idx] and compute loss gradient with respect to this input
def estimateGrad(idx):
  y=model(np.expand_dims(x_test[idx],axis=0))
  y=np.expand_dims(y_train[idx],axis=0)
  x=np.squeeze(x_test[idx])

  

  cce = tf.keras.losses.CategoricalCrossentropy()
  grad=np.zeros((28,28))
  num=int(28/tile)

  #MNIST pictures are 28*28 pixels. Now grad is 28*28 nd.array
  #and x is input image converted to 28*28 nd.array
  for i in range(num):
    for j in range(num):

      plus=copy.deepcopy(x)
      minus=copy.deepcopy(x)
      #Now plus is x+h and minus is x-h
      plus[i*tile:(i+1)*tile , j*tile:(j+1)*tile]=  plus[i*tile:(i+1)*tile , j*tile:(j+1)*tile]+h
      minus[i*tile:(i+1)*tile , j*tile:(j+1)*tile]=  minus[i*tile:(i+1)*tile , j*tile:(j+1)*tile]-h

       
      plus=np.expand_dims(plus,axis=(0,-1))
      minus=np.expand_dims(minus,axis=(0,-1))

      #Now we pass plus and minus to model prediction in the next two lines
      plus=model(plus)
      minus=model(minus)
      
      #Since I want to find gradient of loss with respect to x, in the next 
      #two lines I will set plus=loss(x+h) and minus=loss(x-h)
      #In other words, here our finction f which we want to cumpute its grads
      #is the loss function

      plus=cce(y,plus).numpy()
      minus=cce(y,minus).numpy()

      #Applying the formola : grad=(loss(x+h)-loss(x-h))/2/h
      grad[i*tile:(i+1)*tile , j*tile:(j+1)*tile]=(plus-minus)/2/h

 #Ok now lets check our grad with TF autograd module
  x= tf.convert_to_tensor(np.expand_dims(x_test[idx], axis=0)) 
  with tf.GradientTape() as tape:
    tape.watch(x) 
    y=model(x)
    y_expanded=np.expand_dims(y_train[idx],axis=0)  
    loss=cce(y_expanded,y)

     
  
  delta=tape.gradient(loss,x)
  delta=delta.numpy()
  delta=np.squeeze(delta)

  #delta is gradients returned by TF via auto-differentiation.
  #We calculate the error and unfortunately its large
  diff=rel_error(grad,delta)

  print('diff ',diff)
  w()
  #problem : diff is very large. it should be less than 1e-4

you can refer here for my full code.

Solution

I replaced the gradient estimation code with my own solution gradient and the code works now. Calculating errors can be tricky. As on can see on the histogram (note the log-scale) at the bottom, for most pixels the relative error is smaller than 10^-4 but where the gradient is close to zero, the relative error explodes. The problem with max(rel_err) and mean(rel_err) is, that they are both easily perturbed by outlier pixels. Better measures for if the order of magnitude is most relevant are the geometric mean and median over all non-zero pixels.

Imports

import tensorflow as tf
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import time
import copy

Gradient

# Gradient
def gradient(f, x, h, **kwargs):
    shape = x.shape    
    x = x.ravel()
    out = np.zeros_like(x)
    hs = np.zeros_like(x)
    for i, _ in enumerate(x):
        hs *= 0.
        hs[i] = h
        out[i] = (f((x + hs).reshape(shape), **kwargs) - f((x - hs).reshape(shape), **kwargs)) / (2*h)

    return out.reshape(shape)

Model definition etc.

Not my code.

x_train=y_train=x_test=y_test=None
num_classes = 10
input_shape = (28, 28, 1)
batch_size= 128
epochs=1
h=1e-4
epsilon=1e-3
tile=1

model=None

def testImage(clean_img,adv_image,truth,pred):

    # image = np.squeeze(x_test[index])
    # # plot the sample
    # fig = plt.figure
    # plt.imshow(image, cmap='gray')
    # plt.show()

    # x= np.expand_dims(x_test[index], axis=0)
    # y=model(x)
    # print('model prediction clean example : ',np.argmax(y))
    # print('the ground truth : ',y_train[index])

    print("clean image : \n")
    fig=plt.figure
    plt.imshow(clean_img, cmap='gray')
    plt.show()

    print("adv image : \n")
    fig=plt.figure
    plt.imshow(adv_image, cmap='gray')
    plt.show()

    print('model prediction was : ',pred)
    print('truth was :',truth)

    print('\n\n')


def prepareDataset():
    global x_train,y_train,x_test,y_test
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
    x_train = x_train.astype("float32") / 255
    x_test = x_test.astype("float32") / 255
    x_train = np.expand_dims(x_train, -1)
    x_test = np.expand_dims(x_test, -1)
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes) 


def defineModel():
    global model
    model = Sequential()
    model.add(keras.Input(shape=input_shape))
    model.add(layers.Conv2D(32, kernel_size=(3, 3), activation="relu"))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Conv2D(64, kernel_size=(3, 3), activation="relu"))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add( layers.Flatten())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(num_classes, activation="softmax"))
    model.build()
    model.summary()


def trainModel():
    global model
    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)


def evalModel():
    score = model.evaluate(x_test, y_test, verbose=0)
    print("Test loss:", score[0])
    print("Test accuracy:", score[1])

def fgsm(epsilon,index):
    x = tf.convert_to_tensor(np.expand_dims(x_test[index], axis=0)) 
    with tf.GradientTape() as tape:
        tape.watch(x)

        cce = tf.keras.losses.CategoricalCrossentropy()
        y = model(x)
        y_expanded = np.expand_dims(y_train[index],axis=0)
        #y=np.squeeze(tf.transpose(y))
        loss = cce(y_expanded,y)
        delta = tape.gradient(loss,x)   
        perturbation=epsilon*tf.math.sign(delta)
        return perturbation


def w():
    print()
    ww=input('wait')
    print()


def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

Estimate the Gradient

def estimateGrad(idx):

    cce = tf.keras.losses.CategoricalCrossentropy()

    x = x_test[idx]
    y = y_test[idx]

    def f(x, y):
        y_pred = model(np.expand_dims(x,axis=0))
        return cce(y, y_pred[0])

    grad = gradient(f, x, 1e-3, y=y)

    tfx = tf.convert_to_tensor(np.expand_dims(x,axis=0)) 
    with tf.GradientTape() as tape:
        tape.watch(tfx) 
        y_pred = model(tfx)
        loss=cce(y, y_pred[0])

        delta = tape.gradient(loss, tfx)
        delta = delta.numpy()
        delta = np.squeeze(delta)

    return grad, delta

Call everything

So technically this works. Unfortunately h < 1/1000 runs into numerical issues, so this is the best we can achieve with finite differences.

prepareDataset()
trainModel()
grad, delta = estimateGrad(2)
abs_err = abs(delta - grad).max()
rel_err = (abs(delta - grad / np.clip(delta, 1e-8, np.infty)))

print('Absolute Error: ', abs_err)
print('Median Relative Error: ', np.median(rel_err))
print('Mean Relative Error: ', np.mean(rel_err))
print('Geometric Mean Relative Error: ', gmean(rel_err[rel_err > 0].ravel()))
print('Max Relative Error: ', np.max(rel_err))   

fig, ax = plt.subplots(1, 4, figsize=(20, 5))

ax[0].imshow(grad)
ax[0].set_title('Discrete gradient')

ax[1].imshow(delta)
ax[1].set_title('Analytic gradient')

ax[1].set_title('Relative errorr')
ax[2].imshow(rel_err + 1)

ax[3].set_title('Histogram log-relative error')
logbins = np.geomspace(1e-8, rel_err.max() + 1, 8)
ax[3].hist(rel_err, bins=logbins)
ax[3].set_xscale('log')
plt.show()

Output:

Absolute Error:  0.0001446546
Median Relative Error:  8.748577e-06
Mean Relative Error:  1781.9397
Geometric Mean Relative Error:  0.009761388
Max Relative Error:  53676.195