tensorflowautodiff

Calculating Hessian with tensorflow gradient tape


Thank you for your interest in this issue.

I want to calculate hessian matrix of tensorflow.keras.Model

for high order derivates, i tried nested GradientTape.# example graph, and inputs

xs = tf.constant(tf.random.normal([100,24]))

ex_model = Sequential()
ex_model.add(Input(shape=(24)))
ex_model.add(Dense(10))
ex_model.add(Dense(1))

with tf.GradientTape(persistent=True) as tape:
    tape.watch(xs)
    ys = ex_model(xs)
g = tape.gradient(ys, xs)
h = tape.jacobian(g, xs)
print(g.shape)
print(h.shape)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-20-dbf443f1ddab> in <module>
      5 h = tape.jacobian(g, xs)
      6 print(g.shape)
----> 7 print(h.shape)

AttributeError: 'NoneType' object has no attribute 'shape'

And, another trial...

with tf.GradientTape() as tape1:
    with tf.GradientTape() as tape2:
        tape2.watch(xs)
        ys = ex_model(xs)
    g = tape2.gradient(ys, xs)
h = tape1.jacobian(g, xs)
    
print(g.shape)
print(h.shape)


(100, 24)

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-17-c5bbb17404bc> in <module>
      7 
      8 print(g.shape)
----> 9 print(h.shape)

AttributeError: 'NoneType' object has no attribute 'shape'

why i cannot calculate gradient of g wrt x ?


Solution

  • You already calculate the ys second order of gradients wrt xs which is zero, as it should be when you calculate gradients wrt constant, and that is why tape1.jacobian(g, xs) return None

    When second order of gradients not wrt constant:

    import tensorflow as tf
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Input, Dense
    
    x = tf.Variable(1.0)
    w = tf.constant(3.0)
    with tf.GradientTape() as t2:
      with tf.GradientTape() as t1:
        y = w * x**3
      dy_dx = t1.gradient(y, x)
    d2y_dx2 = t2.gradient(dy_dx, x)
    
    print('dy_dx:', dy_dx) # 3 * 3 * x**2 => 9.0
    print('d2y_dx2:', d2y_dx2) # 9 * 2 * x => 18.0
    

    Outputs:

    dy_dx: tf.Tensor(9.0, shape=(), dtype=float32)
    d2y_dx2: tf.Tensor(18.0, shape=(), dtype=float32)
    

    When second order of gradients is wrt constant:

    import tensorflow as tf
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Input, Dense
    
    x = tf.Variable(1.0)
    w = tf.constant(3.0)
    with tf.GradientTape() as t2:
      with tf.GradientTape() as t1:
        y = w * x
      dy_dx = t1.gradient(y, x)
    d2y_dx2 = t2.gradient(dy_dx, x)
    
    print('dy_dx:', dy_dx)
    print('d2y_dx2:', d2y_dx2)
    

    Outputs:

    dy_dx: tf.Tensor(3.0, shape=(), dtype=float32)
    d2y_dx2: None
    

    You could however calculate the layer parameters second order of gradients wrt xs like e.g Input gradient regularization