Thursday, 17 October 2019

TensorFlow: Draw Loss Landscape of Single Neuron that Learns OR

Single neuron with bias, sigmoid activation, 2 inputs, data from OR true table. Loss landscape plot:

Source code:
%tensorflow_version 2.x
%reset -f

#libs
import tensorflow        as tf;
import numpy             as np;
import matplotlib.pyplot as pp;

from mpl_toolkits import mplot3d;

#constants
BSIZE = 4;

#model
class model(tf.Module):
  def __init__(this):
    super().__init__();
    this.W = tf.Variable(tf.random.uniform([2,1], -1,1));
    this.B = tf.Variable(tf.random.uniform([  1], -1,1));

  @tf.function(input_signature=[tf.TensorSpec([BSIZE,2])])
  def __call__(this,Inp):
    return tf.sigmoid(tf.matmul(Inp,this.W) + this.B);

  def ff(this,Inp):
    Out = tf.sigmoid(tf.matmul(Inp,this.W) + this.B);
    return Out;

#data
X = tf.convert_to_tensor([[0,0],[0,1],[1,0],[1,1]], tf.float32);
Y = tf.convert_to_tensor([[0],  [1],  [1],  [1]  ], tf.float32);

#train
Model = model();
Loss  = tf.losses.MeanAbsoluteError();
Optim = tf.optimizers.SGD(1e-1);
Steps = 5000;
Xyz   = [];
#'''
for I in range(Steps):
  if I%(Steps/10)==0:
    Out       = Model(X);
    Lossvalue = Loss(Y,Out);
    print("Loss:",Lossvalue.numpy());
    Xyz += [[Model.W.numpy()[0],Model.W.numpy()[1],Lossvalue.numpy()]];

  with tf.GradientTape() as T:
    Out       = Model(X);
    Lossvalue = Loss(Y,Out);

  Grads = T.gradient(Lossvalue, Model.trainable_variables);
  Optim.apply_gradients(zip(Grads, Model.trainable_variables));

Out       = Model(X);
Lossvalue = Loss(Y,Out);
print("Loss:",Lossvalue.numpy(),"(Last)");
Xyz += [[Model.W.numpy()[0],Model.W.numpy()[1],Lossvalue.numpy()]];

print("\nWeights of optimum:");
W = tf.keras.backend.flatten(Model.W).numpy();
print(W);
#'''
#loss landscape
D  = 50;
P  = np.linspace(-10,10, D); #marker points
W1 = [];
W2 = [];
for I in range(D):
  W1 += [[]];
  W2 += [[]];  
  for J in range(D):
    W1[I] += [P[I]];
    W2[I] += [P[J]];

print("\nW1",W1);
print("W2",W2);
Z  = [];

for I in range(D):
  Zrow = [];
  for J in range(D):
    Model.W = tf.convert_to_tensor([[P[I]],[P[J]]], tf.float32);
    Out     = Model.ff(X);
    Lossval = Loss(Y,Out).numpy();
    Zrow   += [Lossval];

  Z += [Zrow];

print("Z:",Z);
Z = np.array(Z);

pp.figure(figsize=(8,8));
pp3d = pp.axes(projection="3d",elev=10,azim=10);
pp3d.text(0,0,0,"(0,0,0)");
pp3d.text(W[0],W[1],0,"Optimum");

pp3d.plot([0,10],[0,0],"-r");
pp3d.plot([0,0],[0,10],"-g");
pp3d.plot([0,0],[0,0],[0,1],"-b");
pp3d.plot([W[0]],[W[1]],[0],"yo");

pp3d.set_title("Loss Landscape");
pp3d.set_xlabel("Weight1");
pp3d.set_ylabel("Weight2");
pp3d.set_zlabel("Loss");
pp3d.plot_wireframe(W1,W2,Z, cmap="coolwarm");

#gradient descent curve
W1s = [];
W2s = [];
Ls  = [];
for I in range(len(Xyz)):
  W1s += [Xyz[I][0]];
  W2s += [Xyz[I][1]];
  Ls  += [Xyz[I][2]];  

pp3d.plot(W1s,W2s,Ls,"-ro");
#eof

No comments:

Post a Comment