티스토리 뷰
Numpy를 활용한 Iris 데이터 분석¶
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [2]:
s = open('iris.csv').readline()
#header = [i.strip('"') for i in s.strip().split(',')][:-1]
header = s.strip().split(',')[:-1]
header
Out[2]:
In [3]:
labels = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
iris = np.loadtxt('iris.csv', delimiter=',', skiprows=1, converters={4: lambda s: labels.index(s.decode())})
In [4]:
display(iris.shape, iris[:5])
In [5]:
X = iris[:,:4]
y = iris[:,4]
In [6]:
display(X.shape, y.shape)
display(X[:5], y)
In [7]:
corr = np.corrcoef(X.T)
corr
Out[7]:
In [8]:
plt.imshow(corr, interpolation='none', vmin=-1, vmax=1, cmap='spring')
plt.xticks(range(4), header, rotation=90)
plt.yticks(range(4), header)
plt.colorbar()
Out[8]:
In [9]:
plt.boxplot(X)
plt.xticks(range(1,5), header, rotation=90)
Out[9]:
In [10]:
plt.figure(figsize=[12,8])
for col in range(4):
plt.subplot(2,2,col+1)
# plt.scatter(X[:,col], y, c=y, s=30, alpha=0.2)
plt.scatter(X[:,col], y + np.random.normal(0,0.03,size=len(y)), c=y, s=30, alpha=0.3)
plt.yticks([0,1,2], ['Setosa', 'Versicolor', 'Virginica'], rotation=90)
plt.title(header[col], fontsize=15)
In [11]:
iris_df = pd.DataFrame(X, columns=header)
iris_df
Out[11]:
In [12]:
pd.plotting.scatter_matrix(iris_df, c=y, s=60, alpha=0.8, figsize=[12,12])
Out[12]:
In [13]:
iris_df.info()
In [14]:
iris_df.describe()
Out[14]:
In [15]:
iris_df.plot(kind='hist', alpha=0.3)
Out[15]:
In [16]:
ct, bins = np.histogram(X[:,0], 20)
plt.hist(X[:,0][y==0], bins=bins, alpha=0.3)
plt.hist(X[:,0][y==1], bins=bins, alpha=0.3)
plt.hist(X[:,0][y==2], bins=bins, alpha=0.3)
plt.legend(['Setosa', 'Versicolor', 'Virginica'])
Out[16]:
In [17]:
titles = ['Setosa', 'Versicolor', 'Virginica']
plt.figure(figsize=[12,8])
for col in range(4):
plt.subplot(2,2,col+1)
plt.title(header[col], fontsize=15)
ct, bins = np.histogram(X[:,col], 20)
plt.hist(X[:,col][y==0], bins=bins, alpha=0.3)
plt.hist(X[:,col][y==1], bins=bins, alpha=0.3)
plt.hist(X[:,col][y==2], bins=bins, alpha=0.3)
plt.ylim(0,40)
if(col==0): plt.legend(titles)
In [18]:
N=30
plt.plot(X[:N].T, 'r-', alpha=0.3)
plt.plot(X[50:50+N].T, 'b-', alpha=0.3)
plt.plot(X[100:100+N].T, 'g-', alpha=0.3)
plt.xticks(range(4), header)
Out[18]:
In [19]:
Xs = [X[y==0], X[y==1], X[y==2]]
In [20]:
plt.figure(figsize=[8,12])
for i in range(3):
plt.subplot(3,1,i+1)
plt.plot(X[y==i])
plt.title(titles[i], fontsize=15)
plt.ylim(0,10)
if i==2: plt.xlabel('samples')
if i==0: plt.legend(header)
In [21]:
col1 = 0
col2 = 1
plt.scatter(X[:,col1], X[:,col2], c=y, s=60)
plt.colorbar(shrink=0.5)
plt.xlabel(header[col1])
plt.ylabel(header[col2])
Out[21]:
'beginner > 파이썬 머신러닝 기초' 카테고리의 다른 글
머신러닝 기초_비용함수 (0) | 2019.02.22 |
---|---|
머신러닝 기초 _ 거리 (0) | 2019.02.22 |
머신러닝과 파이썬 (0) | 2019.02.21 |
Scikit-learn 기초 (0) | 2019.02.20 |
머신러닝 기초 (0) | 2019.02.20 |