Single neuron with bias, sigmoid activation, 2 inputs, data from OR true table. Loss landscape plot:
Source code:
%tensorflow_version 2.x
%reset -f
#libs
import tensorflow as tf;
import numpy as np;
import matplotlib.pyplot as pp;
from mpl_toolkits import mplot3d;
#constants
BSIZE = 4;
#model
class model(tf.Module):
def __init__(this):
super().__init__();
this.W = tf.Variable(tf.random.uniform([2,1], -1,1));
this.B = tf.Variable(tf.random.uniform([ 1], -1,1));
@tf.function(input_signature=[tf.TensorSpec([BSIZE,2])])
def __call__(this,Inp):
return tf.sigmoid(tf.matmul(Inp,this.W) + this.B);
def ff(this,Inp):
Out = tf.sigmoid(tf.matmul(Inp,this.W) + this.B);
return Out;
#data
X = tf.convert_to_tensor([[0,0],[0,1],[1,0],[1,1]], tf.float32);
Y = tf.convert_to_tensor([[0], [1], [1], [1] ], tf.float32);
#train
Model = model();
Loss = tf.losses.MeanAbsoluteError();
Optim = tf.optimizers.SGD(1e-1);
Steps = 5000;
Xyz = [];
#'''
for I in range(Steps):
if I%(Steps/10)==0:
Out = Model(X);
Lossvalue = Loss(Y,Out);
print("Loss:",Lossvalue.numpy());
Xyz += [[Model.W.numpy()[0],Model.W.numpy()[1],Lossvalue.numpy()]];
with tf.GradientTape() as T:
Out = Model(X);
Lossvalue = Loss(Y,Out);
Grads = T.gradient(Lossvalue, Model.trainable_variables);
Optim.apply_gradients(zip(Grads, Model.trainable_variables));
Out = Model(X);
Lossvalue = Loss(Y,Out);
print("Loss:",Lossvalue.numpy(),"(Last)");
Xyz += [[Model.W.numpy()[0],Model.W.numpy()[1],Lossvalue.numpy()]];
print("\nWeights of optimum:");
W = tf.keras.backend.flatten(Model.W).numpy();
print(W);
#'''
#loss landscape
D = 50;
P = np.linspace(-10,10, D); #marker points
W1 = [];
W2 = [];
for I in range(D):
W1 += [[]];
W2 += [[]];
for J in range(D):
W1[I] += [P[I]];
W2[I] += [P[J]];
print("\nW1",W1);
print("W2",W2);
Z = [];
for I in range(D):
Zrow = [];
for J in range(D):
Model.W = tf.convert_to_tensor([[P[I]],[P[J]]], tf.float32);
Out = Model.ff(X);
Lossval = Loss(Y,Out).numpy();
Zrow += [Lossval];
Z += [Zrow];
print("Z:",Z);
Z = np.array(Z);
pp.figure(figsize=(8,8));
pp3d = pp.axes(projection="3d",elev=10,azim=10);
pp3d.text(0,0,0,"(0,0,0)");
pp3d.text(W[0],W[1],0,"Optimum");
pp3d.plot([0,10],[0,0],"-r");
pp3d.plot([0,0],[0,10],"-g");
pp3d.plot([0,0],[0,0],[0,1],"-b");
pp3d.plot([W[0]],[W[1]],[0],"yo");
pp3d.set_title("Loss Landscape");
pp3d.set_xlabel("Weight1");
pp3d.set_ylabel("Weight2");
pp3d.set_zlabel("Loss");
pp3d.plot_wireframe(W1,W2,Z, cmap="coolwarm");
#gradient descent curve
W1s = [];
W2s = [];
Ls = [];
for I in range(len(Xyz)):
W1s += [Xyz[I][0]];
W2s += [Xyz[I][1]];
Ls += [Xyz[I][2]];
pp3d.plot(W1s,W2s,Ls,"-ro");
#eof