Numpy를 활용한 Iris 데이터 분석¶

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

s = open('iris.csv').readline()
#header = [i.strip('"') for i in s.strip().split(',')][:-1]
header = s.strip().split(',')[:-1]
header

['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']

labels = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
iris = np.loadtxt('iris.csv', delimiter=',', skiprows=1, converters={4: lambda s: labels.index(s.decode())})

display(iris.shape, iris[:5])

(150, 5)

array([[5.1, 3.5, 1.4, 0.2, 0. ],
       [4.9, 3. , 1.4, 0.2, 0. ],
       [4.7, 3.2, 1.3, 0.2, 0. ],
       [4.6, 3.1, 1.5, 0.2, 0. ],
       [5. , 3.6, 1.4, 0.2, 0. ]])

X = iris[:,:4]
y = iris[:,4]

display(X.shape, y.shape)
display(X[:5], y)

(150, 4)

(150,)

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
       2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.])

corr = np.corrcoef(X.T)
corr

array([[ 1.        , -0.10936925,  0.87175416,  0.81795363],
       [-0.10936925,  1.        , -0.4205161 , -0.35654409],
       [ 0.87175416, -0.4205161 ,  1.        ,  0.9627571 ],
       [ 0.81795363, -0.35654409,  0.9627571 ,  1.        ]])

plt.imshow(corr, interpolation='none', vmin=-1, vmax=1, cmap='spring')
plt.xticks(range(4), header, rotation=90)
plt.yticks(range(4), header)
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x9facb662e8>

plt.boxplot(X)
plt.xticks(range(1,5), header, rotation=90)

([<matplotlib.axis.XTick at 0x9faca10be0>,
  <matplotlib.axis.XTick at 0x9faca10518>,
  <matplotlib.axis.XTick at 0x9faca10278>,
  <matplotlib.axis.XTick at 0x9facab5a58>],
 <a list of 4 Text xticklabel objects>)

plt.figure(figsize=[12,8])

for col in range(4):
    plt.subplot(2,2,col+1)
#     plt.scatter(X[:,col], y, c=y, s=30, alpha=0.2)
    plt.scatter(X[:,col], y + np.random.normal(0,0.03,size=len(y)), c=y, s=30, alpha=0.3)
    plt.yticks([0,1,2], ['Setosa', 'Versicolor', 'Virginica'], rotation=90)
    plt.title(header[col], fontsize=15)

iris_df = pd.DataFrame(X, columns=header)
iris_df

pd.plotting.scatter_matrix(iris_df, c=y, s=60, alpha=0.8, figsize=[12,12])

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x0000009FACCE4C50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000009FACCC3208>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000009FACBA3358>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000009FACCB7828>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x0000009FACBB0EB8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000009FACBB0EF0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000009FAD834C18>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000009FAD86A2E8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x0000009FACD83978>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000009FACDB9048>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000009FACDDF6D8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000009FACE69D68>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x0000009FAD2CB438>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000009FAD2F1AC8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000009FAD323198>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000009FAD34D828>]],
      dtype=object)

iris_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
SepalLength    150 non-null float64
SepalWidth     150 non-null float64
PetalLength    150 non-null float64
PetalWidth     150 non-null float64
dtypes: float64(4)
memory usage: 4.8 KB

iris_df.describe()

iris_df.plot(kind='hist', alpha=0.3)

<matplotlib.axes._subplots.AxesSubplot at 0x9fae94e518>

ct, bins = np.histogram(X[:,0], 20)
plt.hist(X[:,0][y==0], bins=bins, alpha=0.3)
plt.hist(X[:,0][y==1], bins=bins, alpha=0.3)
plt.hist(X[:,0][y==2], bins=bins, alpha=0.3)
plt.legend(['Setosa', 'Versicolor', 'Virginica'])

<matplotlib.legend.Legend at 0x9faea4bef0>

titles = ['Setosa', 'Versicolor', 'Virginica']
plt.figure(figsize=[12,8])

for col in range(4):
    plt.subplot(2,2,col+1)
    plt.title(header[col], fontsize=15)
    ct, bins = np.histogram(X[:,col], 20)
    plt.hist(X[:,col][y==0], bins=bins, alpha=0.3)
    plt.hist(X[:,col][y==1], bins=bins, alpha=0.3)
    plt.hist(X[:,col][y==2], bins=bins, alpha=0.3)
    plt.ylim(0,40)
    if(col==0): plt.legend(titles)

N=30

plt.plot(X[:N].T, 'r-', alpha=0.3)
plt.plot(X[50:50+N].T, 'b-', alpha=0.3)
plt.plot(X[100:100+N].T, 'g-', alpha=0.3)
plt.xticks(range(4), header)

([<matplotlib.axis.XTick at 0x9faeee5b00>,
  <matplotlib.axis.XTick at 0x9faef88828>,
  <matplotlib.axis.XTick at 0x9faef4a7f0>,
  <matplotlib.axis.XTick at 0x9faea98ba8>],
 <a list of 4 Text xticklabel objects>)

Xs = [X[y==0], X[y==1], X[y==2]]

plt.figure(figsize=[8,12])

for i in range(3):
    plt.subplot(3,1,i+1)
    plt.plot(X[y==i])
    plt.title(titles[i], fontsize=15)
    plt.ylim(0,10)
    if i==2: plt.xlabel('samples')
    if i==0: plt.legend(header)

col1 = 0
col2 = 1

plt.scatter(X[:,col1], X[:,col2], c=y, s=60)
plt.colorbar(shrink=0.5)
plt.xlabel(header[col1])
plt.ylabel(header[col2])

Text(0,0.5,'SepalWidth')

	SepalLength	SepalWidth	PetalLength	PetalWidth
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2
5	5.4	3.9	1.7	0.4
6	4.6	3.4	1.4	0.3
7	5.0	3.4	1.5	0.2
8	4.4	2.9	1.4	0.2
9	4.9	3.1	1.5	0.1
10	5.4	3.7	1.5	0.2
11	4.8	3.4	1.6	0.2
12	4.8	3.0	1.4	0.1
13	4.3	3.0	1.1	0.1
14	5.8	4.0	1.2	0.2
15	5.7	4.4	1.5	0.4
16	5.4	3.9	1.3	0.4
17	5.1	3.5	1.4	0.3
18	5.7	3.8	1.7	0.3
19	5.1	3.8	1.5	0.3
20	5.4	3.4	1.7	0.2
21	5.1	3.7	1.5	0.4
22	4.6	3.6	1.0	0.2
23	5.1	3.3	1.7	0.5
24	4.8	3.4	1.9	0.2
25	5.0	3.0	1.6	0.2
26	5.0	3.4	1.6	0.4
27	5.2	3.5	1.5	0.2
28	5.2	3.4	1.4	0.2
29	4.7	3.2	1.6	0.2
...	...	...	...	...
120	6.9	3.2	5.7	2.3
121	5.6	2.8	4.9	2.0
122	7.7	2.8	6.7	2.0
123	6.3	2.7	4.9	1.8
124	6.7	3.3	5.7	2.1
125	7.2	3.2	6.0	1.8
126	6.2	2.8	4.8	1.8
127	6.1	3.0	4.9	1.8
128	6.4	2.8	5.6	2.1
129	7.2	3.0	5.8	1.6
130	7.4	2.8	6.1	1.9
131	7.9	3.8	6.4	2.0
132	6.4	2.8	5.6	2.2
133	6.3	2.8	5.1	1.5
134	6.1	2.6	5.6	1.4
135	7.7	3.0	6.1	2.3
136	6.3	3.4	5.6	2.4
137	6.4	3.1	5.5	1.8
138	6.0	3.0	4.8	1.8
139	6.9	3.1	5.4	2.1
140	6.7	3.1	5.6	2.4
141	6.9	3.1	5.1	2.3
142	5.8	2.7	5.1	1.9
143	6.8	3.2	5.9	2.3
144	6.7	3.3	5.7	2.5
145	6.7	3.0	5.2	2.3
146	6.3	2.5	5.0	1.9
147	6.5	3.0	5.2	2.0
148	6.2	3.4	5.4	2.3
149	5.9	3.0	5.1	1.8

	SepalLength	SepalWidth	PetalLength	PetalWidth
count	150.000000	150.000000	150.000000	150.000000
mean	5.843333	3.054000	3.758667	1.198667
std	0.828066	0.433594	1.764420	0.763161
min	4.300000	2.000000	1.000000	0.100000
25%	5.100000	2.800000	1.600000	0.300000
50%	5.800000	3.000000	4.350000	1.300000
75%	6.400000	3.300000	5.100000	1.800000
max	7.900000	4.400000	6.900000	2.500000

머신러닝 기초_비용함수 (0)	2019.02.22
머신러닝 기초 _ 거리 (0)	2019.02.22
머신러닝과 파이썬 (0)	2019.02.21
Scikit-learn 기초 (0)	2019.02.20
머신러닝 기초 (0)	2019.02.20

조환희의 학습 블로그

티스토리 뷰

머신러닝 기초_iris활용

Numpy를 활용한 Iris 데이터 분석¶

'beginner > 파이썬 머신러닝 기초' 카테고리의 다른 글

티스토리툴바

	SepalLength	SepalWidth	PetalLength	PetalWidth
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2
5	5.4	3.9	1.7	0.4
6	4.6	3.4	1.4	0.3
7	5.0	3.4	1.5	0.2
8	4.4	2.9	1.4	0.2
9	4.9	3.1	1.5	0.1
10	5.4	3.7	1.5	0.2
11	4.8	3.4	1.6	0.2
12	4.8	3.0	1.4	0.1
13	4.3	3.0	1.1	0.1
14	5.8	4.0	1.2	0.2
15	5.7	4.4	1.5	0.4
16	5.4	3.9	1.3	0.4
17	5.1	3.5	1.4	0.3
18	5.7	3.8	1.7	0.3
19	5.1	3.8	1.5	0.3
20	5.4	3.4	1.7	0.2
21	5.1	3.7	1.5	0.4
22	4.6	3.6	1.0	0.2
23	5.1	3.3	1.7	0.5
24	4.8	3.4	1.9	0.2
25	5.0	3.0	1.6	0.2
26	5.0	3.4	1.6	0.4
27	5.2	3.5	1.5	0.2
28	5.2	3.4	1.4	0.2
29	4.7	3.2	1.6	0.2
...	...	...	...	...
120	6.9	3.2	5.7	2.3
121	5.6	2.8	4.9	2.0
122	7.7	2.8	6.7	2.0
123	6.3	2.7	4.9	1.8
124	6.7	3.3	5.7	2.1
125	7.2	3.2	6.0	1.8
126	6.2	2.8	4.8	1.8
127	6.1	3.0	4.9	1.8
128	6.4	2.8	5.6	2.1
129	7.2	3.0	5.8	1.6
130	7.4	2.8	6.1	1.9
131	7.9	3.8	6.4	2.0
132	6.4	2.8	5.6	2.2
133	6.3	2.8	5.1	1.5
134	6.1	2.6	5.6	1.4
135	7.7	3.0	6.1	2.3
136	6.3	3.4	5.6	2.4
137	6.4	3.1	5.5	1.8
138	6.0	3.0	4.8	1.8
139	6.9	3.1	5.4	2.1
140	6.7	3.1	5.6	2.4
141	6.9	3.1	5.1	2.3
142	5.8	2.7	5.1	1.9
143	6.8	3.2	5.9	2.3
144	6.7	3.3	5.7	2.5
145	6.7	3.0	5.2	2.3
146	6.3	2.5	5.0	1.9
147	6.5	3.0	5.2	2.0
148	6.2	3.4	5.4	2.3
149	5.9	3.0	5.1	1.8

« 2024/05 »
일	월	화	수	목	금	토
			1	2	3	4
5	6	7	8	9	10	11
12	13	14	15	16	17	18
19	20	21	22	23	24	25
26	27	28	29	30	31

	SepalLength	SepalWidth	PetalLength	PetalWidth
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2
5	5.4	3.9	1.7	0.4
6	4.6	3.4	1.4	0.3
7	5.0	3.4	1.5	0.2
8	4.4	2.9	1.4	0.2
9	4.9	3.1	1.5	0.1
10	5.4	3.7	1.5	0.2
11	4.8	3.4	1.6	0.2
12	4.8	3.0	1.4	0.1
13	4.3	3.0	1.1	0.1
14	5.8	4.0	1.2	0.2
15	5.7	4.4	1.5	0.4
16	5.4	3.9	1.3	0.4
17	5.1	3.5	1.4	0.3
18	5.7	3.8	1.7	0.3
19	5.1	3.8	1.5	0.3
20	5.4	3.4	1.7	0.2
21	5.1	3.7	1.5	0.4
22	4.6	3.6	1.0	0.2
23	5.1	3.3	1.7	0.5
24	4.8	3.4	1.9	0.2
25	5.0	3.0	1.6	0.2
26	5.0	3.4	1.6	0.4
27	5.2	3.5	1.5	0.2
28	5.2	3.4	1.4	0.2
29	4.7	3.2	1.6	0.2
...	...	...	...	...
120	6.9	3.2	5.7	2.3
121	5.6	2.8	4.9	2.0
122	7.7	2.8	6.7	2.0
123	6.3	2.7	4.9	1.8
124	6.7	3.3	5.7	2.1
125	7.2	3.2	6.0	1.8
126	6.2	2.8	4.8	1.8
127	6.1	3.0	4.9	1.8
128	6.4	2.8	5.6	2.1
129	7.2	3.0	5.8	1.6
130	7.4	2.8	6.1	1.9
131	7.9	3.8	6.4	2.0
132	6.4	2.8	5.6	2.2
133	6.3	2.8	5.1	1.5
134	6.1	2.6	5.6	1.4
135	7.7	3.0	6.1	2.3
136	6.3	3.4	5.6	2.4
137	6.4	3.1	5.5	1.8
138	6.0	3.0	4.8	1.8
139	6.9	3.1	5.4	2.1
140	6.7	3.1	5.6	2.4
141	6.9	3.1	5.1	2.3
142	5.8	2.7	5.1	1.9
143	6.8	3.2	5.9	2.3
144	6.7	3.3	5.7	2.5
145	6.7	3.0	5.2	2.3
146	6.3	2.5	5.0	1.9
147	6.5	3.0	5.2	2.0
148	6.2	3.4	5.4	2.3
149	5.9	3.0	5.1	1.8

	SepalLength	SepalWidth	PetalLength	PetalWidth
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2
5	5.4	3.9	1.7	0.4
6	4.6	3.4	1.4	0.3
7	5.0	3.4	1.5	0.2
8	4.4	2.9	1.4	0.2
9	4.9	3.1	1.5	0.1
10	5.4	3.7	1.5	0.2
11	4.8	3.4	1.6	0.2
12	4.8	3.0	1.4	0.1
13	4.3	3.0	1.1	0.1
14	5.8	4.0	1.2	0.2
15	5.7	4.4	1.5	0.4
16	5.4	3.9	1.3	0.4
17	5.1	3.5	1.4	0.3
18	5.7	3.8	1.7	0.3
19	5.1	3.8	1.5	0.3
20	5.4	3.4	1.7	0.2
21	5.1	3.7	1.5	0.4
22	4.6	3.6	1.0	0.2
23	5.1	3.3	1.7	0.5
24	4.8	3.4	1.9	0.2
25	5.0	3.0	1.6	0.2
26	5.0	3.4	1.6	0.4
27	5.2	3.5	1.5	0.2
28	5.2	3.4	1.4	0.2
29	4.7	3.2	1.6	0.2
...	...	...	...	...
120	6.9	3.2	5.7	2.3
121	5.6	2.8	4.9	2.0
122	7.7	2.8	6.7	2.0
123	6.3	2.7	4.9	1.8
124	6.7	3.3	5.7	2.1
125	7.2	3.2	6.0	1.8
126	6.2	2.8	4.8	1.8
127	6.1	3.0	4.9	1.8
128	6.4	2.8	5.6	2.1
129	7.2	3.0	5.8	1.6
130	7.4	2.8	6.1	1.9
131	7.9	3.8	6.4	2.0
132	6.4	2.8	5.6	2.2
133	6.3	2.8	5.1	1.5
134	6.1	2.6	5.6	1.4
135	7.7	3.0	6.1	2.3
136	6.3	3.4	5.6	2.4
137	6.4	3.1	5.5	1.8
138	6.0	3.0	4.8	1.8
139	6.9	3.1	5.4	2.1
140	6.7	3.1	5.6	2.4
141	6.9	3.1	5.1	2.3
142	5.8	2.7	5.1	1.9
143	6.8	3.2	5.9	2.3
144	6.7	3.3	5.7	2.5
145	6.7	3.0	5.2	2.3
146	6.3	2.5	5.0	1.9
147	6.5	3.0	5.2	2.0
148	6.2	3.4	5.4	2.3
149	5.9	3.0	5.1	1.8