Training/Test dataset, learning rate, normalization¶

우리가 가지고 있는 데이터를 Training과 Test로 나누기
Training set은 학습에만 사용. Teset set은 모델의 평가.

import tensorflow as tf

x_data = [[1, 2, 1], [1, 3, 2], [1, 3, 4], [1, 5, 5], [1, 7, 5], [1, 2, 5], [1, 6, 6], [1, 7, 7]]
y_data = [[0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0], [1, 0, 0]]

# Evaluation our model using this test data set
x_test = [[2, 1, 1], [3, 1, 2], [3, 3, 4]]
y_test = [[0, 0, 1], [0, 0, 1], [0, 0, 1]]

어떻게 나눠야 하냐?

X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 3])
# placeholder가 이럴때 편리. 학습데이터든 테스트데이터든 사용가능.
W = tf.Variable(tf.random_normal([3, 3]))
b = tf.Variable(tf.random_normal([3]))

hypothesis = tf.nn.softmax(tf.matmul(X, W)+b)
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

# Correct prediction Test model
prediction = tf.arg_max(hypothesis, 1)
is_correct = tf.equal(prediction, tf.arg_max(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())
    for step in range(201):
        cost_val, W_val, _ = sess.run([cost, W, optimizer],
                                     feed_dict={X: x_data, Y: y_data})
        print(step, cost_val, W_val)
    
    # predict
    print("Prediction: ", sess.run(prediction, feed_dict={X: x_test}))
    # Calculate the accuracy
    print("Accuracy: ", sess.run(accuracy, feed_dict={X: x_test, Y: y_test}))

# 결과값 일부만 출력
0 1.5397861 [[ 1.8925256   0.4287325   0.19238694]
 [-0.12909223 -1.0582068  -0.16315994]
 [-0.6142031   0.27412933 -0.549119  ]]
1 1.3190705 [[ 1.8688099   0.434021    0.21081401]
 [-0.19533941 -1.0006969  -0.15442257]
 [-0.65575755  0.3108505  -0.5442857 ]]
2 1.2793169 [[ 1.862095    0.42610234  0.22544764]
 [-0.17166497 -1.0116303  -0.16716364]
 [-0.61217403  0.27912587 -0.5561446 ]]
3 1.257182 [[ 1.8466132   0.42476133  0.2422705 ]
 [-0.19532683 -0.9885116  -0.16662043]
 [-0.6135181   0.28167254 -0.5573472 ]]
4 1.2420418 [[ 1.8358377   0.4197939   0.25801334]
 [-0.19435279 -0.9845198  -0.17158636]
 [-0.5919764   0.26549304 -0.5627094 ]]

...

196 0.618799 [[ 0.517981    0.14858338  1.8470815 ]
 [-0.3144764  -0.5116923  -0.5242901 ]
 [ 0.05233563 -0.05526878 -0.88625944]]
197 0.6179559 [[ 0.51356554  0.14850758  1.8515728 ]
 [-0.31420463 -0.5113062  -0.5249479 ]
 [ 0.05380786 -0.05542909 -0.88757133]]
198 0.6171181 [[ 0.5091612   0.1484376   1.856047  ]
 [-0.31393614 -0.510923   -0.5255996 ]
 [ 0.05527845 -0.05558917 -0.88888186]]
199 0.61628544 [[ 0.5047679   0.14837337  1.8605046 ]
 [-0.3136707  -0.51054275 -0.5262453 ]
 [ 0.05674759 -0.05574922 -0.89019096]]
200 0.6154579 [[ 0.5003855   0.14831482  1.8649455 ]
 [-0.3134084  -0.51016533 -0.52688503]
 [ 0.05821515 -0.05590914 -0.8914986 ]]
Prediction:  [2 2 2]
Accuracy:  1.0

Learning rate: NaN!¶

from PIL import Image
Image.open('rate.png')

learning rate를 1.5로 바꿔보자¶

X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 3])
# placeholder가 이럴때 편리. 학습데이터든 테스트데이터든 사용가능.
W = tf.Variable(tf.random_normal([3, 3]))
b = tf.Variable(tf.random_normal([3]))

hypothesis = tf.nn.softmax(tf.matmul(X, W)+b)
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.5).minimize(cost)

# Correct prediction Test model
prediction = tf.arg_max(hypothesis, 1)
is_correct = tf.equal(prediction, tf.arg_max(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())
    for step in range(201):
        cost_val, W_val, _ = sess.run([cost, W, optimizer],
                                     feed_dict={X: x_data, Y: y_data})
        print(step, cost_val, W_val)
    
    # predict
    print("Prediction: ", sess.run(prediction, feed_dict={X: x_test}))
    # Calculate the accuracy
    print("Accuracy: ", sess.run(accuracy, feed_dict={X: x_test, Y: y_test}))

0 1.6634662 [[-2.2075746   0.5806351   0.05093762]
 [-2.1975489   1.8915738   1.0907044 ]
 [-1.8790697   2.740889    1.4753516 ]]
1 16.181648 [[-1.8325746  -0.29573685  0.5523095 ]
 [ 0.23995113 -1.9142226   2.459001  ]
 [ 0.5584303  -0.9352894   2.7140303 ]]
2 24.668037 [[-1.4575759   0.26676315 -0.38518918]
 [ 2.6774485   0.7107774  -2.603496  ]
 [ 2.9959288   1.8772106  -2.5359683 ]]
3 14.287976 [[-2.5791383   0.8258258   0.17731059]
 [-1.4405096   3.3287358  -1.1034966 ]
 [-1.1252847   4.6859245  -1.2234685 ]]
4 27.268196 [[-2.2041383  -0.11167383  0.7398102 ]
 [ 0.99699044 -0.60876346  0.39650273]
 [ 1.3122153   0.935925    0.0890311 ]]
5 3.042033 [[-2.821877    0.44410133  0.80177385]
 [-1.8019562   2.0010686   0.58561754]
 [-1.594873    3.7193468   0.21269768]]
6 18.955597 [[-2.446877   -0.4811626   1.3520378 ]
 [ 0.6355438  -1.9118645   2.061051  ]
 [ 0.84262705 -0.01832223  1.5128667 ]]
7 17.738667 [[-2.071879    0.08133739  0.4145398 ]
 [ 3.0730395   0.7131355  -3.0014448 ]
 [ 3.2801242   2.7941778  -3.7371304 ]]
8 14.503769 [[-3.188974    0.6359326   0.97703964]
 [-1.0356164   3.3217914  -1.5014452 ]
 [-0.8323531   5.5941553  -2.4246306 ]]
9 29.303299 [[-2.8139739e+00 -3.0156720e-01  1.5395395e+00]
 [ 1.4018836e+00 -6.1570811e-01 -1.4455318e-03]
 [ 1.6051469e+00  1.8441553e+00 -1.1121309e+00]]
10 3.051435 [[-3.5816474   0.15776405  1.8478816 ]
 [-1.9151199   1.7820475   0.9178026 ]
 [-1.712048    4.176757   -0.12753755]]
11 19.200672 [[-3.2066474  -0.6706238   2.3012693 ]
 [ 0.5223801  -1.9357989   2.198149  ]
 [ 0.72545195  0.5372989   1.0744205 ]]
12 17.749727 [[-2.8316474  -0.10812378  1.3637694 ]
 [ 2.9598799   0.6892011  -2.864351  ]
 [ 3.162952    3.349799   -4.1755795 ]]
13 11.495592 [[-3.8370576   0.33479255  1.9262632 ]
 [-0.9112296   3.0603235  -1.3643636 ]
 [-0.54493713  5.7451944  -2.8630857 ]]
14 27.80763 [[-3.4620576  -0.6027051   2.488761  ]
 [ 1.5262704  -0.87717175  0.13563192]
 [ 1.8925629   1.9951966  -1.5505881 ]]
15 3.5476327 [[-4.1945796  -0.09651816  2.715096  ]
 [-1.6582363   1.6287386   0.81422836]
 [-1.4724057   4.534624   -0.72504675]]
16 18.976534 [[-3.8195796  -0.8735096   3.1170874 ]
 [ 0.77926373 -1.9854422   1.9909091 ]
 [ 0.9650943   0.9474349   0.42464244]]
17 14.765488 [[-3.4445798  -0.3110096   2.1795876 ]
 [ 3.2167633   0.63955784 -3.0715904 ]
 [ 3.4025934   3.759935   -4.8253565 ]]
18 nan [[nan nan nan]
 [nan nan nan]
 [nan nan nan]]

...

199 nan [[nan nan nan]
 [nan nan nan]
 [nan nan nan]]
200 nan [[nan nan nan]
 [nan nan nan]
 [nan nan nan]]
Prediction:  [0 0 0]
Accuracy:  0.0

learning rate를 1e-10로 바꿔보자¶

X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 3])
# placeholder가 이럴때 편리. 학습데이터든 테스트데이터든 사용가능.
W = tf.Variable(tf.random_normal([3, 3]))
b = tf.Variable(tf.random_normal([3]))

hypothesis = tf.nn.softmax(tf.matmul(X, W)+b)
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-10).minimize(cost)

# Correct prediction Test model
prediction = tf.arg_max(hypothesis, 1)
is_correct = tf.equal(prediction, tf.arg_max(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())
    for step in range(201):
        cost_val, W_val, _ = sess.run([cost, W, optimizer],
                                     feed_dict={X: x_data, Y: y_data})
        print(step, cost_val, W_val)
    
    # predict
    print("Prediction: ", sess.run(prediction, feed_dict={X: x_test}))
    # Calculate the accuracy
    print("Accuracy: ", sess.run(accuracy, feed_dict={X: x_test, Y: y_test}))

0 3.3829603 [[ 1.0471715   1.2519882  -0.3824097 ]
 [ 0.5754172  -1.0587244   1.3790662 ]
 [-0.6138766   1.3712655  -0.12763448]]
1 3.3829603 [[ 1.0471715   1.2519882  -0.3824097 ]
 [ 0.5754172  -1.0587244   1.3790662 ]
 [-0.6138766   1.3712655  -0.12763448]]
2 3.3829603 [[ 1.0471715   1.2519882  -0.3824097 ]
 [ 0.5754172  -1.0587244   1.3790662 ]
 [-0.6138766   1.3712655  -0.12763448]]
3 3.3829603 [[ 1.0471715   1.2519882  -0.3824097 ]
 [ 0.5754172  -1.0587244   1.3790662 ]
 [-0.6138766   1.3712655  -0.12763448]]
4 3.3829603 [[ 1.0471715   1.2519882  -0.3824097 ]
 [ 0.5754172  -1.0587244   1.3790662 ]
 [-0.6138766   1.3712655  -0.12763448]]

...

196 3.3829603 [[ 1.0471715   1.2519882  -0.3824097 ]
 [ 0.5754172  -1.0587244   1.3790662 ]
 [-0.6138766   1.3712655  -0.12763448]]
197 3.3829603 [[ 1.0471715   1.2519882  -0.3824097 ]
 [ 0.5754172  -1.0587244   1.3790662 ]
 [-0.6138766   1.3712655  -0.12763448]]
198 3.3829603 [[ 1.0471715   1.2519882  -0.3824097 ]
 [ 0.5754172  -1.0587244   1.3790662 ]
 [-0.6138766   1.3712655  -0.12763448]]
199 3.3829603 [[ 1.0471715   1.2519882  -0.3824097 ]
 [ 0.5754172  -1.0587244   1.3790662 ]
 [-0.6138766   1.3712655  -0.12763448]]
200 3.3829603 [[ 1.0471715   1.2519882  -0.3824097 ]
 [ 0.5754172  -1.0587244   1.3790662 ]
 [-0.6138766   1.3712655  -0.12763448]]
Prediction:  [0 0 1]
Accuracy:  0.0

200번 동안 cost가 계속 같다. local_minimum에 빠졌거나 학습이 이루어지지 않고 있다.

NaN은 데이터가 nomalize 하지 않을때도 발생한다.¶

import numpy as np

xy = np.array([[828.659973, 833.450012, 908100, 828.349976, 831.659973],
               [823.02002, 828.070007, 1828100, 821.655029, 828.070007],
               [819.929993, 824.400024, 1438100, 818.97998, 824.159973],
               [816, 820.958984, 1008100, 815.48999, 819.23999],
               [819.359985, 823, 1188100, 818.469971, 818.97998],
               [819, 823, 1198100, 816, 820.450012],
               [811.700012, 815.25, 1098100, 809.780029, 813.669983],
               [809.51001, 816.659973, 1398100, 804.539978, 809.559998]])

Image.open('nan.png')

x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
#placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 4])
Y = tf.placeholder(tf.float32, shape=[None, 1])
W = tf.Variable(tf.random_normal([4, 1], name='weight'))
b = tf.Variable(tf.random_normal([1]), name='bias')

hypothesis = tf.matmul(X, W) + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
for step in range(2001):
    cost_val, hy_val, _ = sess.run(
        [cost, hypothesis, train], feed_dict={X: x_data, Y: y_data})
    print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)

# 출력값이 너무 많아 일부만 출력

0 Cost:  3728694800000.0 
Prediction:
 [[1362703.1]
 [2743147.5]
 [2157958. ]
 [1512750. ]
 [1782836.4]
 [1797843.4]
 [1647792.9]
 [2097943.8]]
1 Cost:  4.0966447e+27 
Prediction:
 [[-4.5148717e+13]
 [-9.0888999e+13]
 [-7.1499101e+13]
 [-5.0120490e+13]
 [-5.9069675e+13]
 [-5.9566851e+13]
 [-5.4595083e+13]
 [-6.9510392e+13]]
2 Cost:  inf 
Prediction:
 [[1.4965152e+21]
 [3.0126385e+21]
 [2.3699340e+21]
 [1.6613112e+21]
 [1.9579440e+21]
 [1.9744235e+21]
 [1.8096275e+21]
 [2.3040157e+21]]
3 Cost:  inf 
Prediction:
 [[-4.9604021e+28]
 [-9.9857981e+28]
 [-7.8554668e+28]
 [-5.5066406e+28]
 [-6.4898703e+28]
 [-6.5444940e+28]
 [-5.9982555e+28]
 [-7.6369714e+28]]
4 Cost:  inf 
Prediction:
 [[1.6441923e+36]
 [3.3099276e+36]
 [2.6038008e+36]
 [1.8252505e+36]
 [2.1511552e+36]
 [2.1692611e+36]
 [1.9882028e+36]
 [2.5313773e+36]]
5 Cost:  inf 
Prediction:
 [[-inf]
 [-inf]
 [-inf]
 [-inf]
 [-inf]
 [-inf]
 [-inf]
 [-inf]]
6 Cost:  nan 
Prediction:
 [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
7 Cost:  nan 
Prediction:
 [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]

...

1999 Cost:  nan 
Prediction:
 [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]
2000 Cost:  nan 
Prediction:
 [[nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]
 [nan]]

Normalized inputs (min-max scale)¶

최댓값 1 최솟값 0으로 normalized 한다.

from sklearn.preprocessing import MinMaxScaler

xy = MinMaxScaler().fit_transform(xy)
print(xy)

[[1.         1.         0.         1.         1.        ]
 [0.70548491 0.70439552 1.         0.71881783 0.83755792]
 [0.54412549 0.50274824 0.57608696 0.60646801 0.6606331 ]
 [0.33890353 0.31368023 0.10869565 0.45989134 0.43800918]
 [0.51436    0.4258239  0.30434783 0.58504805 0.42624401]
 [0.49556179 0.4258239  0.31521739 0.48131134 0.49276137]
 [0.11436064 0.         0.20652174 0.22007776 0.18597238]
 [0.         0.07747099 0.5326087  0.         0.        ]]

fit과fit_transform 차이
fit은 단순히 xy데이터를 가지고 내부적으로 normalization에 필요한 값들을 계산해서 저장해두는거고 transform이라는 함수(fit_transform 은 fit과 transform을 한번에 수행)를 호출 할 때 실제 각 값들이 normalization된다.

Image.open('gra.png')

x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
#placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 4])
Y = tf.placeholder(tf.float32, shape=[None, 1])
W = tf.Variable(tf.random_normal([4, 1], name='weight'))
b = tf.Variable(tf.random_normal([1]), name='bias')

hypothesis = tf.matmul(X, W) + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
for step in range(2001):
    cost_val, hy_val, _ = sess.run(
        [cost, hypothesis, train], feed_dict={X: x_data, Y: y_data})
    print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)

# 결과 일부만 출력

0 Cost:  1.6898079 
Prediction:
 [[ 0.24768381]
 [-1.7545954 ]
 [-0.99748063]
 [-0.17950694]
 [-0.47923422]
 [-0.4581023 ]
 [-0.39909208]
 [-1.0179515 ]]
1 Cost:  1.6896998 
Prediction:
 [[ 0.24774197]
 [-1.7545354 ]
 [-0.99743134]
 [-0.17946965]
 [-0.47918963]
 [-0.45805913]
 [-0.39906278]
 [-1.0179216 ]]
2 Cost:  1.6895914 
Prediction:
 [[ 0.2478002 ]
 [-1.7544754 ]
 [-0.9973821 ]
 [-0.17943233]
 [-0.47914502]
 [-0.45801595]
 [-0.39903346]
 [-1.0178913 ]]
3 Cost:  1.689483 
Prediction:
 [[ 0.24785842]
 [-1.7544153 ]
 [-0.9973327 ]
 [-0.179395  ]
 [-0.4791005 ]
 [-0.4579728 ]
 [-0.39900413]
 [-1.0178614 ]]
4 Cost:  1.6893747 
Prediction:
 [[ 0.24791664]
 [-1.7543553 ]
 [-0.9972833 ]
 [-0.17935775]
 [-0.4790559 ]
 [-0.45792964]
 [-0.3989748 ]
 [-1.0178314 ]]

...

1996 Cost:  1.4891863 
Prediction:
 [[ 0.35935774]
 [-1.6391464 ]
 [-0.90267456]
 [-0.10790836]
 [-0.39365906]
 [-0.37519258]
 [-0.3427595 ]
 [-0.9601586 ]]
1997 Cost:  1.4890933 
Prediction:
 [[ 0.35941154]
 [-1.6390909 ]
 [-0.9026288 ]
 [-0.1078739 ]
 [-0.39361775]
 [-0.37515262]
 [-0.3427323 ]
 [-0.96013075]]
1998 Cost:  1.4890002 
Prediction:
 [[ 0.35946524]
 [-1.639035  ]
 [-0.9025831 ]
 [-0.10783939]
 [-0.39357656]
 [-0.3751127 ]
 [-0.3427052 ]
 [-0.96010286]]
1999 Cost:  1.4889071 
Prediction:
 [[ 0.35951903]
 [-1.6389793 ]
 [-0.90253735]
 [-0.10780489]
 [-0.3935353 ]
 [-0.3750727 ]
 [-0.34267804]
 [-0.9600749 ]]
2000 Cost:  1.4888139 
Prediction:
 [[ 0.35957277]
 [-1.6389236 ]
 [-0.9024916 ]
 [-0.10777039]
 [-0.3934941 ]
 [-0.3750328 ]
 [-0.34265083]
 [-0.960047  ]]

딥러닝의 기본 개념: 시작과 XOR 문제~Back-propagation과 2006/2007 '딥'의 출현 (0)	2019.05.06
Meet MNIST Dataset (0)	2019.05.06
Training/Testing 데이터 셋 이론 (0)	2019.05.05
학습 rate, Overfitting, 그리고 일반화(Regularization) 이론 (0)	2019.05.05
Tensorflow로 Fancy Softmax Clasification 구현 (1)	2019.05.05

조환희의 학습 블로그

티스토리 뷰

Training/Test dataset, learning rate, normalization