티스토리 뷰

 

 

 

 

Logistic Regression

In [1]:
from PIL import Image
Image.open('Logistic Regression.png')
Out[1]:
 

Training Data

In [5]:
import tensorflow as tf

x_data = [[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]]
y_data = [[0], [0], [0], [1], [1], [1]]

# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 2])
Y = tf.placeholder(tf.float32, shape=[None, 1])
W = tf.Variable(tf.random_normal([2, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis using sigmoid: tf.div(1., 1. + tf.exp(tf.matmul(X, W) + b))
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)

# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))


train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32) 
# float 32에 casting 하면 true=1, false=0
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted,Y), dtype=tf.float32)) 
# 예측값과 y가 같으면 true, 틀리면 false 거기에 casting하면 1,0

# Launch graph
with tf.Session() as sess:
    # Initialize Tensorflow variables
    sess.run(tf.global_variables_initializer())
    
    for step in range(10001):
        cost_val, _ = sess.run([cost,train], feed_dict={X: x_data, Y: y_data})
        if step % 200 == 0:
            print(step, cost_val)
            
    # Accuracy report
    h, c, a = sess.run([hypothesis, predicted, accuracy],
                      feed_dict={X: x_data, Y: y_data})
    print("\nHipothesis: ", h, "\nCorrect (Y): ", c, "\nAccuracy: ", a)
 
0 0.7144708
200 0.48005065
400 0.43862903
600 0.41316465
800 0.39457297
1000 0.37938392
1200 0.3661176
1400 0.35408387
1600 0.34293556
1800 0.33248472
2000 0.32262185
2200 0.3132771
2400 0.30440214
2600 0.29596052
2800 0.28792194
3000 0.28026098
3200 0.2729547
3400 0.26598227
3600 0.25932434
3800 0.2529629
4000 0.24688107
4200 0.24106318
4400 0.23549442
4600 0.23016088
4800 0.22504948
5000 0.22014801
5200 0.2154451
5400 0.21092981
5600 0.20659213
5800 0.20242256
6000 0.19841236
6200 0.19455309
6400 0.19083703
6600 0.18725683
6800 0.18380557
7000 0.18047689
7200 0.17726457
7400 0.17416298
7600 0.17116678
7800 0.16827087
8000 0.16547059
8200 0.16276155
8400 0.16013926
8600 0.15760003
8800 0.15513991
9000 0.1527555
9200 0.15044348
9400 0.14820063
9600 0.14602403
9800 0.14391075
10000 0.14185822

Hipothesis:  [[0.02759416]
 [0.15435112]
 [0.28944004]
 [0.7885205 ]
 [0.9439846 ]
 [0.98165876]] 
Correct (Y):  [[0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]] 
Accuracy:  1.0
 

Classifying diabetes

 

feature들과 target값(당뇨병=1,아니면=0)

In [6]:
Image.open('diabetes.png')
Out[6]:
In [ ]:
import numpy as np

xy = np.loadtxt('data-03-diabetes.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 8])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([8, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

#Hypothesis using sigmoid: tf.div(1., 1. + tf.exp(tf.matmul(X, W)))
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
# cost/loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis))
train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

# Accuracy computation
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Launch graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    feed = {X: x_data, Y: y_data}
    for step in range(10001):
        sess.run(train, feed_dict=feed)
        if step % 200 == 0:
            print(step, sess.run(cost, feed_dict=feed))
            
    # Accuracy report
    h, c, a =sess.run([hypothesis, predicted, accuracy], feed_dict=feed)
    print("\nHypothesis: ", h, "\nCorrect (Y): ", c, "\nAccuracy: ", a)
In [ ]:
# 결과 값 일부만 출력

0   1.3086263
200 0.8495514
400 0.7393282
600 0.69743496
800 0.6690898

...

9200 0.47940743
9400 0.4790678
9600 0.47874758
9800 0.4784453
10000 0.4781597

Hypothesis:  
[[0.40194005]
 [0.93241286]
 [0.24879721]
 [0.940664  ]
 [0.10599464]
...
 [0.69642776]
 [0.7324742 ]
 [0.8581585 ]
 [0.6740077 ]
 [0.9035657 ]] 

Correct (Y):  
[[0.]
 [1.]
 [0.]
 [1.]
 [0.]
...
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]] 
Accuracy:  0.7720685
 

Exercise

  • CSV reading using tf.decode_csv
  • Try other classification data from Kaggle
공지사항
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
링크
«   2025/04   »
1 2 3 4 5
6 7 8 9 10 11 12
13 14 15 16 17 18 19
20 21 22 23 24 25 26
27 28 29 30
글 보관함