The training data in the drawing below need like 5 hidden layers to separate, but when using ReLU, the output doesn't change because of dead ReLU nodes, exploding gradients?. The problem here is dead ReLU, not the vanishing gradient problem of sigmoid-like activation.
Training data & class distribution:
Source code:
#libs import tensorflow as tf; import matplotlib.pyplot as pyplot; #mockup to emphasize value name def units(Num): return Num; #end def #PROGRAMME ENTRY POINT========================================================== #data #https://i.imgur.com/uVOxZR7.png X = [[1,1],[1,2],[1,3],[2,1],[2,2],[2,3],[3,1],[3,2],[3,3],[4,1],[4,2],[4,3],[5,1],[6,1]]; Y = [[0], [1], [0], [1], [0], [1], [0], [2], [1], [1], [1], [0], [0], [1] ]; Max_X = 6; Max_Y = 2; Batch_Size = 14; #normalise for I in range(len(X)): X[I][0] /= Max_X; X[I][1] /= Max_X; Y[I][0] /= Max_Y; #end for #model Input = tf.placeholder(dtype=tf.float32, shape=[Batch_Size,2]); Expected = tf.placeholder(dtype=tf.float32, shape=[Batch_Size,1]); #RELU DOESN'T WORK, DEAD RELU? SIGMOID WORKS BUT SLOW. #1 Weight1 = tf.Variable(tf.random_uniform(shape=[2,units(60)], minval=-1, maxval=1)); Bias1 = tf.Variable(tf.random_uniform(shape=[ units(60)], minval=-1, maxval=1)); Hidden1 = tf.sigmoid(tf.matmul(Input,Weight1) + Bias1); #2 Weight2 = tf.Variable(tf.random_uniform(shape=[60,units(50)], minval=-1, maxval=1)); Bias2 = tf.Variable(tf.random_uniform(shape=[ units(50)], minval=-1, maxval=1)); Hidden2 = tf.sigmoid(tf.matmul(Hidden1,Weight2) + Bias2); #3 Weight3 = tf.Variable(tf.random_uniform(shape=[50,units(40)], minval=-1, maxval=1)); Bias3 = tf.Variable(tf.random_uniform(shape=[ units(40)], minval=-1, maxval=1)); Hidden3 = tf.sigmoid(tf.matmul(Hidden2,Weight3) + Bias3); #4 Weight4 = tf.Variable(tf.random_uniform(shape=[40,units(30)], minval=-1, maxval=1)); Bias4 = tf.Variable(tf.random_uniform(shape=[ units(30)], minval=-1, maxval=1)); Hidden4 = tf.sigmoid(tf.matmul(Hidden3,Weight4) + Bias4); #5 Weight5 = tf.Variable(tf.random_uniform(shape=[30,units(20)], minval=-1, maxval=1)); Bias5 = tf.Variable(tf.random_uniform(shape=[ units(20)], minval=-1, maxval=1)); Hidden5 = tf.sigmoid(tf.matmul(Hidden4,Weight5) + Bias5); #out Weight6 = tf.Variable(tf.random_uniform(shape=[20,units(1)], minval=-1, maxval=1)); Bias6 = tf.Variable(tf.random_uniform(shape=[ units(1)], minval=-1, maxval=1)); Output = tf.sigmoid(tf.matmul(Hidden5,Weight6) + Bias6); Loss = tf.reduce_sum(tf.square(Expected-Output)); Optimiser = tf.train.GradientDescentOptimizer(1e-1); Training = Optimiser.minimize(Loss); #training Sess = tf.Session(); Init = tf.global_variables_initializer(); Sess.run(Init); Losses = []; for I in range(20000): if (I%2000==0): Lossvalue = Sess.run(Loss, feed_dict={Input:X, Expected:Y}); Losses += [Lossvalue]; if (I==0): print("Loss:",Lossvalue,"(first)"); else: print("Loss:",Lossvalue); #end if Sess.run(Training, feed_dict={Input:X, Expected:Y}); #end for Lastloss = Sess.run(Loss, feed_dict={Input:X, Expected:Y}); Losses += [Lastloss]; print("Loss:",Lastloss,"(last)"); #eval print("\nEval:"); Evalresults = Sess.run(Output,feed_dict={Input:X, Expected:Y}).tolist(); for I in range(len(Evalresults)): Evalresults[I] = [round(Evalresults[I][0]*Max_Y)]; #end for print(Evalresults); Sess.close(); #result: diagram print("\nLoss curve:"); pyplot.plot(Losses); #eof
Colab link:
https://colab.research.google.com/drive/1F8G1ug09IJo3-haZVV5lHgIP_wveQAzk
No comments:
Post a Comment