Thursday, 12 September 2019

Case Study: Separate Heavily Mixed-up Ys, and Dead ReLU Problem

Separation has to do hard works when the class distribution on plane, in space, or hyperspace gets mixed-up like randomised.

The training data in the drawing below need like 5 hidden layers to separate, but when using ReLU, the output doesn't change because of dead ReLU nodes, exploding gradients?. The problem here is dead ReLU, not the vanishing gradient problem of sigmoid-like activation.

Training data & class distribution:

Source code:
#libs
import tensorflow        as tf;
import matplotlib.pyplot as pyplot;

#mockup to emphasize value name
def units(Num):
  return Num;
#end def

#PROGRAMME ENTRY POINT==========================================================
#data
#https://i.imgur.com/uVOxZR7.png
X = [[1,1],[1,2],[1,3],[2,1],[2,2],[2,3],[3,1],[3,2],[3,3],[4,1],[4,2],[4,3],[5,1],[6,1]];
Y = [[0],  [1],  [0],  [1],  [0],  [1],  [0],  [2],  [1],  [1],  [1],  [0],  [0],  [1]  ];
Max_X      = 6;
Max_Y      = 2;
Batch_Size = 14;

#normalise
for I in range(len(X)):
  X[I][0] /= Max_X;
  X[I][1] /= Max_X;
  Y[I][0] /= Max_Y;
#end for

#model
Input     = tf.placeholder(dtype=tf.float32, shape=[Batch_Size,2]);
Expected  = tf.placeholder(dtype=tf.float32, shape=[Batch_Size,1]);

#RELU DOESN'T WORK, DEAD RELU? SIGMOID WORKS BUT SLOW.
#1
Weight1   = tf.Variable(tf.random_uniform(shape=[2,units(60)], minval=-1, maxval=1));
Bias1     = tf.Variable(tf.random_uniform(shape=[  units(60)], minval=-1, maxval=1));
Hidden1   = tf.sigmoid(tf.matmul(Input,Weight1) + Bias1);

#2
Weight2   = tf.Variable(tf.random_uniform(shape=[60,units(50)], minval=-1, maxval=1));
Bias2     = tf.Variable(tf.random_uniform(shape=[   units(50)], minval=-1, maxval=1));
Hidden2   = tf.sigmoid(tf.matmul(Hidden1,Weight2) + Bias2);

#3
Weight3   = tf.Variable(tf.random_uniform(shape=[50,units(40)], minval=-1, maxval=1));
Bias3     = tf.Variable(tf.random_uniform(shape=[   units(40)], minval=-1, maxval=1));
Hidden3   = tf.sigmoid(tf.matmul(Hidden2,Weight3) + Bias3);

#4
Weight4   = tf.Variable(tf.random_uniform(shape=[40,units(30)], minval=-1, maxval=1));
Bias4     = tf.Variable(tf.random_uniform(shape=[   units(30)], minval=-1, maxval=1));
Hidden4   = tf.sigmoid(tf.matmul(Hidden3,Weight4) + Bias4);

#5
Weight5   = tf.Variable(tf.random_uniform(shape=[30,units(20)], minval=-1, maxval=1));
Bias5     = tf.Variable(tf.random_uniform(shape=[   units(20)], minval=-1, maxval=1));
Hidden5   = tf.sigmoid(tf.matmul(Hidden4,Weight5) + Bias5);

#out
Weight6   = tf.Variable(tf.random_uniform(shape=[20,units(1)], minval=-1, maxval=1));
Bias6     = tf.Variable(tf.random_uniform(shape=[   units(1)], minval=-1, maxval=1));
Output    = tf.sigmoid(tf.matmul(Hidden5,Weight6) + Bias6);

Loss      = tf.reduce_sum(tf.square(Expected-Output));
Optimiser = tf.train.GradientDescentOptimizer(1e-1);
Training  = Optimiser.minimize(Loss);

#training
Sess = tf.Session();
Init = tf.global_variables_initializer();
Sess.run(Init);

Losses = [];
for I in range(20000):
  if (I%2000==0):
    Lossvalue = Sess.run(Loss, feed_dict={Input:X, Expected:Y});
    Losses   += [Lossvalue];
    
    if (I==0):
      print("Loss:",Lossvalue,"(first)");
    else:
      print("Loss:",Lossvalue);
  #end if
  
  Sess.run(Training, feed_dict={Input:X, Expected:Y});
#end for

Lastloss = Sess.run(Loss, feed_dict={Input:X, Expected:Y});
Losses  += [Lastloss];
print("Loss:",Lastloss,"(last)");

#eval
print("\nEval:");
Evalresults = Sess.run(Output,feed_dict={Input:X, Expected:Y}).tolist();
for I in range(len(Evalresults)):
  Evalresults[I] = [round(Evalresults[I][0]*Max_Y)];
#end for
print(Evalresults);
Sess.close();

#result: diagram
print("\nLoss curve:");
pyplot.plot(Losses);
#eof

Colab link:
https://colab.research.google.com/drive/1F8G1ug09IJo3-haZVV5lHgIP_wveQAzk

No comments:

Post a Comment