Source code:
%tensorflow_version 2.x %reset -f #libs import tensorflow as tf; import numpy as np; import matplotlib.pyplot as pp; from mpl_toolkits import mplot3d; #constants BSIZE = 4; #model class model(tf.Module): def __init__(this): super().__init__(); this.W = tf.Variable(tf.random.uniform([2,1], -1,1)); this.B = tf.Variable(tf.random.uniform([ 1], -1,1)); @tf.function(input_signature=[tf.TensorSpec([BSIZE,2])]) def __call__(this,Inp): return tf.sigmoid(tf.matmul(Inp,this.W) + this.B); def ff(this,Inp): Out = tf.sigmoid(tf.matmul(Inp,this.W) + this.B); return Out; #data X = tf.convert_to_tensor([[0,0],[0,1],[1,0],[1,1]], tf.float32); Y = tf.convert_to_tensor([[0], [1], [1], [1] ], tf.float32); #train Model = model(); Loss = tf.losses.MeanAbsoluteError(); Optim = tf.optimizers.SGD(1e-1); Steps = 5000; Xyz = []; #''' for I in range(Steps): if I%(Steps/10)==0: Out = Model(X); Lossvalue = Loss(Y,Out); print("Loss:",Lossvalue.numpy()); Xyz += [[Model.W.numpy()[0],Model.W.numpy()[1],Lossvalue.numpy()]]; with tf.GradientTape() as T: Out = Model(X); Lossvalue = Loss(Y,Out); Grads = T.gradient(Lossvalue, Model.trainable_variables); Optim.apply_gradients(zip(Grads, Model.trainable_variables)); Out = Model(X); Lossvalue = Loss(Y,Out); print("Loss:",Lossvalue.numpy(),"(Last)"); Xyz += [[Model.W.numpy()[0],Model.W.numpy()[1],Lossvalue.numpy()]]; print("\nWeights of optimum:"); W = tf.keras.backend.flatten(Model.W).numpy(); print(W); #''' #loss landscape D = 50; P = np.linspace(-10,10, D); #marker points W1 = []; W2 = []; for I in range(D): W1 += [[]]; W2 += [[]]; for J in range(D): W1[I] += [P[I]]; W2[I] += [P[J]]; print("\nW1",W1); print("W2",W2); Z = []; for I in range(D): Zrow = []; for J in range(D): Model.W = tf.convert_to_tensor([[P[I]],[P[J]]], tf.float32); Out = Model.ff(X); Lossval = Loss(Y,Out).numpy(); Zrow += [Lossval]; Z += [Zrow]; print("Z:",Z); Z = np.array(Z); pp.figure(figsize=(8,8)); pp3d = pp.axes(projection="3d",elev=10,azim=10); pp3d.text(0,0,0,"(0,0,0)"); pp3d.text(W[0],W[1],0,"Optimum"); pp3d.plot([0,10],[0,0],"-r"); pp3d.plot([0,0],[0,10],"-g"); pp3d.plot([0,0],[0,0],[0,1],"-b"); pp3d.plot([W[0]],[W[1]],[0],"yo"); pp3d.set_title("Loss Landscape"); pp3d.set_xlabel("Weight1"); pp3d.set_ylabel("Weight2"); pp3d.set_zlabel("Loss"); pp3d.plot_wireframe(W1,W2,Z, cmap="coolwarm"); #gradient descent curve W1s = []; W2s = []; Ls = []; for I in range(len(Xyz)): W1s += [Xyz[I][0]]; W2s += [Xyz[I][1]]; Ls += [Xyz[I][2]]; pp3d.plot(W1s,W2s,Ls,"-ro"); #eof
No comments:
Post a Comment