티스토리 뷰
In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [23]:
'''f = open('성별학생수현황.csv')
line = f.readline()
features = line.strip().split(',')
data=[]
for line in f:
l = line.strip().split(',')
data.append(l)
data
f.close()
features'''
Out[23]:
In [24]:
'''for i,c in enumerate(features):
print('%02d=>%s' % (i,c))'''
Out[24]:
In [37]:
df_All=pd.read_csv("성별학생수현황.csv", engine='python')
df_All.head()
Out[37]:
In [38]:
df_2018 = df_All[df_All['기준년도']==2018]
df_2018.head()
Out[38]:
In [41]:
df_2018.describe()
Out[41]:
In [42]:
df_2018_element = df_2018[df_2018['학교급명']=='초등학교']
df_2018_element.head()
Out[42]:
In [74]:
boy_1_2018 = np.sum(df_2018_element['1학년남자학생수(명)']),
boy_2_2018 = np.sum(df_2018_element['2학년남자학생수(명)']),
boy_3_2018 = np.sum(df_2018_element['3학년남자학생수(명)']),
boy_4_2018 = np.sum(df_2018_element['4학년남자학생수(명)']),
boy_5_2018 = np.sum(df_2018_element['5학년남자학생수(명)']),
boy_6_2018 = np.sum(df_2018_element['6학년남자학생수(명)'])
boy_1_2018, boy_2_2018, boy_3_2018, boy_4_2018, boy_5_2018, (boy_6_2018,)
Out[74]:
In [75]:
girl_1_2018 = np.sum(df_2018_element['1학년여자학생수(명)']),
girl_2_2018 = np.sum(df_2018_element['2학년여자학생수(명)']),
girl_3_2018 = np.sum(df_2018_element['3학년여자학생수(명)']),
girl_4_2018 = np.sum(df_2018_element['4학년여자학생수(명)']),
girl_5_2018 = np.sum(df_2018_element['5학년여자학생수(명)']),
girl_6_2018 = np.sum(df_2018_element['6학년여자학생수(명)'])
girl_1_2018, girl_2_2018, girl_3_2018, girl_4_2018, girl_5_2018, (girl_6_2018,)
Out[75]:
In [51]:
df_2018_mid = df_2018[df_2018['학교급명']=='중학교']
df_2018_mid.head()
Out[51]:
In [76]:
boy_7_2018 = np.sum(df_2018_mid['1학년남자학생수(명)']),
boy_8_2018 = np.sum(df_2018_mid['2학년남자학생수(명)']),
boy_9_2018 = np.sum(df_2018_mid['3학년남자학생수(명)'])
boy_7_2018, boy_8_2018, (boy_9_2018,)
Out[76]:
In [77]:
girl_7_2018 = np.sum(df_2018_mid['1학년여자학생수(명)']),
girl_8_2018 = np.sum(df_2018_mid['2학년여자학생수(명)']),
girl_9_2018 = np.sum(df_2018_mid['3학년여자학생수(명)'])
girl_7_2018, girl_8_2018, (girl_9_2018,)
Out[77]:
In [55]:
df_2018_high = df_2018[df_2018['학교급명']=='고등학교']
df_2018_high.head()
Out[55]:
In [72]:
boy_10_2018 = np.sum(df_2018_high['1학년남자학생수(명)']),
boy_11_2018 = np.sum(df_2018_high['2학년남자학생수(명)']),
boy_12_2018 = np.sum(df_2018_high['3학년남자학생수(명)'])
boy_10_2018, boy_11_2018, (boy_12_2018,)
Out[72]:
In [73]:
girl_10_2018 = np.sum(df_2018_high['1학년여자학생수(명)']),
girl_11_2018 = np.sum(df_2018_high['2학년여자학생수(명)']),
girl_12_2018 = np.sum(df_2018_high['3학년여자학생수(명)'])
girl_10_2018, girl_11_2018, (girl_12_2018,)
Out[73]:
In [93]:
boy_2018=np.array([boy_1_2018, boy_2_2018, boy_3_2018, boy_4_2018, boy_5_2018, (boy_6_2018,), boy_7_2018, boy_8_2018, (boy_9_2018,), boy_10_2018, boy_11_2018, (boy_12_2018,)])
boy_2018
Out[93]:
In [94]:
girl_2018=np.array([girl_1_2018, girl_2_2018, girl_3_2018, girl_4_2018, girl_5_2018, (girl_6_2018,), girl_7_2018, girl_8_2018, (girl_9_2018,), girl_10_2018, girl_11_2018, (girl_12_2018,)])
girl_2018
Out[94]:
In [104]:
import matplotlib.pyplot as plt
plt.plot([1,2,3,4,5,6,7,8,9,10,11,12],boy_2018)
plt.plot([1,2,3,4,5,6,7,8,9,10,11,12], girl_2018)
plt.legend(['boy', 'girl'])
plt.xlabel('grade')
plt.ylabel('number')
plt.title('2018 Boys and Girls \n for each grade', fontsize=20)
Out[104]:
In [98]:
total_2018=boy_2018+girl_2018
total_2018
Out[98]:
In [106]:
import matplotlib.pyplot as plt
plt.plot([1,2,3,4,5,6,7,8,9,10,11,12],total_2018)
plt.legend(['total'])
plt.xlabel('grade')
plt.ylabel('number')
plt.title('2018 Total student \n for each grade', fontsize=20)
Out[106]:
'beginner > 파이썬 분석' 카테고리의 다른 글
캐글 타이타닉 데이터 분석 - 1 (0) | 2019.04.11 |
---|---|
Pandas를 이용한 지하철 데이터 분석 (0) | 2019.03.12 |
로지스틱 회귀분석 실습 (0) | 2019.02.11 |
인공신경망 실습 (2) | 2019.02.10 |
Iris 데이터를 이용해 간단한 랜덤 포레스트 구현 (1) | 2019.02.06 |