This case study is related to the problem of finding types of products from full product names. A full product name is roughly 20 words which means the Y values are not separated by lines (2 axes), nor planes (3 axes), but separated by hyper-planes.
Training data consist of ~20,000 samples, and 100 classes (product types). It seems hard to separate those 20k Y points in the hyper-space.
Back to the simple sample above, it can be separated with 4 hidden layers, see the source code below.
Source code:
#libs import tensorflow as tf; import matplotlib.pyplot as pyplot; #data #https://i.imgur.com/PUw7NWv.png X = [[0,0], [0,1], [1,0], [10,10], [15,15], [20,20]]; Y = [[0], [1], [2], [3], [0], [3] ]; Batch_Size = 6; MAX = 20; #normalise for I in range(len(X)): X[I][0] = X[I][0]/MAX; X[I][1] = X[I][1]/MAX; Y[I][0] = Y[I][0]/3; #end for #model Input = tf.placeholder(dtype=tf.float32, shape=[Batch_Size,2]); Expected = tf.placeholder(dtype=tf.float32, shape=[Batch_Size,1]); Weight1 = tf.Variable(tf.random_uniform(shape=[2,40], minval=-1, maxval=1)); Bias1 = tf.Variable(tf.random_uniform(shape=[ 40], minval=-1, maxval=1)); Hidden1 = tf.nn.relu(tf.matmul(Input,Weight1) + Bias1); Weight2 = tf.Variable(tf.random_uniform(shape=[40,30], minval=-1, maxval=1)); Bias2 = tf.Variable(tf.random_uniform(shape=[ 30], minval=-1, maxval=1)); Hidden2 = tf.nn.relu(tf.matmul(Hidden1,Weight2) + Bias2); Weight3 = tf.Variable(tf.random_uniform(shape=[30,20], minval=-1, maxval=1)); Bias3 = tf.Variable(tf.random_uniform(shape=[ 20], minval=-1, maxval=1)); Hidden3 = tf.nn.relu(tf.matmul(Hidden2,Weight3) + Bias3); Weight4 = tf.Variable(tf.random_uniform(shape=[20,10], minval=-1, maxval=1)); Bias4 = tf.Variable(tf.random_uniform(shape=[ 10], minval=-1, maxval=1)); Hidden4 = tf.nn.relu(tf.matmul(Hidden3,Weight4) + Bias4); Weight5 = tf.Variable(tf.random_uniform(shape=[10,1], minval=-1, maxval=1)); Bias5 = tf.Variable(tf.random_uniform(shape=[ 1], minval=-1, maxval=1)); Output = tf.sigmoid(tf.matmul(Hidden4,Weight5) + Bias5); Loss = tf.reduce_sum(tf.square(Expected-Output)); Optimiser = tf.train.GradientDescentOptimizer(1e-1); Training = Optimiser.minimize(Loss); #train Sess = tf.Session(); Init = tf.global_variables_initializer(); Sess.run(Init); Losses = []; for I in range(20000): if (I%2000==0): Lossvalue = Sess.run(Loss, feed_dict={Input:X, Expected:Y}); Losses += [Lossvalue]; print("Loss:",round(Lossvalue,18)); #end if Sess.run(Training, feed_dict={Input:X, Expected:Y}); #end for #result: loss Lastloss = Sess.run(Loss, feed_dict={Input:X, Expected:Y}); Losses += [Lastloss]; print("Loss:",Lastloss,"(Last)"); #result: eval Evalresult = Sess.run(Output, feed_dict={Input:X, Expected:Y}); for I in range(Batch_Size): Evalresult[I][0] = round(Evalresult[I][0],18); print("Eval (0 0.33 0.66 1 0 1):\n"+str(Evalresult)); #result: diagram print("Loss curve:"); pyplot.plot(Losses); #eof
Colab link:
https://colab.research.google.com/drive/11n78tFnKPgLpPsi6Ff3NgZUM0UYunWy7
No comments:
Post a Comment