제 2장 서울시 범죄 현황 분석¶

구성 및 블로깅 진행과정¶

2-1 데이터 획득하기
2-2 pandas를 이용하여 데이터 정리하기
2-3 지도 정보를 얻을 수 있는 Google Maps
2-4 Google Maps를 이용해서 주소와 위도, 경도 정보 얻기
-------------------------------------------------------
2-5 pandas의 pivot_table 학습하기
2-6 pivot_table을 이용해서 데이터 정리하기
2-7 데이터 표현을 위해 다듬기
-------------------------------------------------------
2-8 좀 더 편리한 시각화 도구 - Seaborn
2-9 범죄 데이터 시각화하기
-------------------------------------------------------
2-10 지도 시각화 도구 - folium
2-11 서울시 범죄율에 대한 지도 시각화
2-12 서울시 경찰서별 검거율과 구별 범죄 발생율을 동시에 시각화하기

출처: 파이썬으로 데이터 주무르기 by 민형기

2-8 좀 더 편리한 시각화 도구 - Seaborn¶

# 몇 개의 사인함수를 그려보자.
# seaborn을 import 할 때는 matplotlib도 같이 import 되어 있어야 한다.
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

x = np.linspace(0, 14, 100)
y1 = np.sin(x)
y2 = 2*np.sin(x+0.5)
y3 = 3*np.sin(x+1.0)
y4 = 4*np.sin(x+1.5)

plt.figure(figsize=(10,6))
plt.plot(x, y1, x, y2, x, y3, x, y4)
plt.show()

# seaborn은 whitegrid라는 스타일을 지원한다.
sns.set_style("whitegrid")

plt.figure(figsize=(10,6))
plt.plot(x, y1, x, y2, x, y3, x, y4)
plt.show()

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

sns.set_style('whitegrid')
%matplotlib inline

# seaborn에 포함되어 있는 데이터셋
# 요일별 점심, 저녁, 흡연 여부 & 식사 금액 & 팁
tips = sns.load_dataset("tips")
tips.head()

# boxplot을 그리는데 x축에는 요일, y축에는 전체 금액을 그린다.
plt.figure(figsize=(8,6))
sns.boxplot(x="day", y="total_bill", data=tips)
plt.show()

#hue라는 옵션을 이용하여 구분할 수 있다.
plt.figure(figsize=(8,6))
sns.boxplot(x='day', y='total_bill', hue='smoker', data=tips, palette='Set3')
plt.show()

위 데이터에서 보면 흡연자가 대체적으로 결제 금액의 범위가 크다는 것을 볼 수 있다.

plt.figure(figsize=(8,6))
sns.swarmplot(x='day', y='total_bill', data=tips, color='.5')
plt.show()

plt.figure(figsize=(8,6))
sns.boxplot(x='day', y='total_bill', data=tips)
sns.swarmplot(x='day', y='total_bill', data=tips, color='.25')
plt.show()

# darkgrid 스타일
# 데이터를 scatter처럼 그리고 직선으로 regression한 그림도 같이 그려주고 유효범위도 ci로 잡아준다.
sns.set_style("darkgrid")
sns.lmplot(x='total_bill', y='tip', data=tips, height=7)
plt.show()

#lmplot도 hue 옵션을 가질 수 있으며 palette로 색상을 지정할 수 있다.
sns.lmplot(x='total_bill', y='tip', hue='smoker', data=tips, palette='Set1', height=7)
plt.show()

uniform_data = np.random.rand(10,12)
uniform_data

array([[0.83666205, 0.08559792, 0.09644545, 0.2315992 , 0.84025657,
        0.63066756, 0.57578346, 0.1066434 , 0.10725941, 0.71430008,
        0.91503025, 0.1516732 ],
       [0.86624697, 0.53609833, 0.39747714, 0.70029354, 0.59136013,
        0.24059958, 0.85756993, 0.73013651, 0.22695031, 0.0986498 ,
        0.01471954, 0.38675877],
       [0.3437829 , 0.64117413, 0.85208174, 0.91607283, 0.15021417,
        0.2811987 , 0.83194578, 0.60936644, 0.07138867, 0.05013622,
        0.09252043, 0.80472116],
       [0.25752768, 0.01568994, 0.67498027, 0.23182116, 0.72122089,
        0.9838959 , 0.1757501 , 0.40351434, 0.28680451, 0.77508725,
        0.58446557, 0.21785458],
       [0.04003208, 0.89435626, 0.71756379, 0.10922755, 0.53283964,
        0.70969311, 0.03276397, 0.87402092, 0.65652653, 0.01722884,
        0.30947688, 0.90948403],
       [0.75120878, 0.42403376, 0.85544157, 0.2767124 , 0.90505397,
        0.5915722 , 0.20775143, 0.43305821, 0.86759469, 0.47854828,
        0.84950051, 0.15990865],
       [0.55829583, 0.81438671, 0.52145671, 0.49861364, 0.21846332,
        0.44110309, 0.61586704, 0.70446109, 0.21739954, 0.27330412,
        0.10678586, 0.4512381 ],
       [0.29709353, 0.81702274, 0.43140682, 0.48376528, 0.06306388,
        0.27104655, 0.96776545, 0.44889048, 0.94314791, 0.10339698,
        0.7864767 , 0.28355936],
       [0.35816311, 0.55795299, 0.61838288, 0.927347  , 0.81840631,
        0.12359033, 0.30068444, 0.01037628, 0.94931991, 0.69274961,
        0.9435388 , 0.08243518],
       [0.53784204, 0.42611068, 0.01228216, 0.50322787, 0.47144684,
        0.56447041, 0.71229815, 0.84191176, 0.22348581, 0.75877489,
        0.51116341, 0.39772712]])

sns.heatmap(uniform_data)
plt.show()

sns.heatmap(uniform_data,vmin=0, vmax=1)
plt.show()

# 연도 및 월별 항공기 승객수를 기록한 데이터
# pivot 기능으로 간편하게 월별, 연도별로 구분할 수 있다. 
flights = sns.load_dataset('flights')
flights.head()

flights = flights.pivot('month', 'year', 'passengers')
flights.head()

plt.figure(figsize=(10,8))
sns.heatmap(flights)
plt.show()

# heatmap을 사용하면 이런 종류의 데이터는 경향성을 설명하기 참 좋다.
plt.figure(figsize=(10,8))
sns.heatmap(flights, annot=True, fmt='d')
plt.show()

# iris 데이터 불러오기
sns.set(style = 'ticks')
iris = sns.load_dataset('iris')
iris.head(10)

sns.pairplot(iris)
plt.show()

sns.pairplot(iris, hue='species')
plt.show()

sns.pairplot(iris, vars=['sepal_width', 'sepal_length'])
plt.show()

sns.pairplot(iris,x_vars=['sepal_width', 'sepal_length'],
             y_vars=['petal_width', 'petal_length'])
plt.show()

anscombe = sns.load_dataset('anscombe')
anscombe.head(5)

sns.set_style('darkgrid')

sns.lmplot(x='x', y='y', data=anscombe.query('dataset=="I"'), ci=None, height=7)
plt.show()

sns.lmplot(x='x', y='y', data=anscombe.query('dataset=="I"'),
          ci=None, scatter_kws={'s':80}, height=7)
plt.show()

sns.lmplot(x='x',y='y', data=anscombe.query('dataset == "II"'),
           order=1, ci=None, scatter_kws={'s':80}, height=7)
plt.show()

sns.lmplot(x='x', y='y', data=anscombe.query('dataset == "II"'),
          order=2, ci=None, scatter_kws={"s":80}, height=7)
plt.show()

sns.lmplot(x='x',y='y', data=anscombe.query('dataset == "III"'),
           ci=None, scatter_kws={'s':80}, height=7)
plt.show()

sns.lmplot(x='x', y='y', data=anscombe.query('dataset == "III"'),
          robust=True, ci=None, scatter_kws={"s":80}, height=7)

<seaborn.axisgrid.FacetGrid at 0x1d260ef9cc0>

2-9 범죄 데이터 시각화하기¶

# 방금 배운 seaborn을 활용하여 뭔가 성과를 얻어내보자.
# 일단 한글 폰트 문제부터!
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

import platform
path = 'c:/Windows/Fonts/malgun.ttf'
from matplotlib import font_manager, rc

if platform.system() == 'Darwin':
    rc('font', family='AppleGothic')
elif platform.system() == 'Windows':
    font_name = font_manager.FontProperties(fname=path).get_name()
    rc('font', family=font_name)
else:
    print('Unkonwn system... sorry~~~~')

# 데이터를 다시 한번 확인하고
crime_anal_norm.head()

# 강도와 폭력, 살인과 폭력, 강도와 살인의 상관관계를 확인하자
sns.pairplot(crime_anal_norm, vars=['강도', '살인', '폭력'], kind='reg', height=3)
plt.show()

# 인구수, CCTV개수와 살인, 강도의 상관관계
sns.pairplot(crime_anal_norm, x_vars=['인구수', 'CCTV'],
             y_vars=['살인', '강도'], kind='reg', height=3)
plt.show()

cctv와 살인의 상관관계가 낮지만, 자세히 보면 CCTV의 개수가 적을 때 살인이 일어나는 구간이 있다. 즉, CCTV를 기준으로 좌측면에 살인과 강도의 높은 수를 갖는 데이터가 보인다는 것이다.

# 인구수, CCTV와 살인검거율, 폭력검거율의 상관관계
sns.pairplot(crime_anal_norm,
             x_vars=['인구수', 'CCTV'],
             y_vars=['살인검거율', '폭력검거율'], kind='reg', height=3)
plt.show()

살인 및 폭력 검거율과 CCTV의 관계가 양의 상관관계가 아니다. 오히려 음의 상관계수도 보인다. 또 인구수와 폭력 검거율도 음의 상관관계가 관찰된다.

# 검거율의 합계인 검거 항목 최고 값을 100으로 한정하고 그 값으로 정렬
tmp_max = crime_anal_norm['검거'].max()
crime_anal_norm['검거'] = crime_anal_norm['검거'] / tmp_max * 100
crime_anal_nrom_sort = crime_anal_norm.sort_values(by='검거', ascending=False)
crime_anal_nrom_sort.head()

# 범죄 검거 비율 heatmap으로 시각화
target_col = ['강간검거율', '강도검거율', '살인검거율', '절도검거율', '폭력검거율']

crime_anal_norm_sort = crime_anal_norm.sort_values(by='검거', ascending=False)

plt.figure(figsize = (10,10))
sns.heatmap(crime_anal_norm_sort[target_col], annot=True, fmt='f',
           linewidths=.5, cmap='RdPu')
plt.title('범죄 검거 비율 (정규화된 검거의 합으로 정렬)')
plt.show()

결과를 보면 절도 검거율이 다른 검거율에 비해 낮다는 것을 알 수 있다.
그리고 그래프 하단으로 갈수록 검거율이 낮은데 그 속에 강남 3구 중에서 '서초구'가 보인다.
전반적으로 검거율이 우수한 구는 '도봉구','광진구','성동구'로 보인다.

# 범죄 발생 건수 heatmap 시각화
target_col=['강간','강도','살인','절도','폭력','범죄']

crime_anal_norm['범죄'] = crime_anal_norm['범죄'] / 5
crime_anal_norm_sort = crime_anal_norm.sort_values(by='범죄', ascending=False)
plt.figure(figsize=(10,10))

sns.heatmap(crime_anal_norm_sort[target_col], annot=True, fmt='f', linewidth=.5)
plt.title('범죄비율 (정규화된 발생 건수로 정렬)')
plt.show()

발생 건수로 보니 '강남구', '양천구', '영등포구'가 범죄 발생 건수가 높다. 그리고 '송파구'와 '서초구'도 낮다고 볼 수 없다. 그렇다면 정말 강남 3구가 안전하다고 할 수 있을지 의문이 생긴다.

# 내마음에 저장~@
crime_anal_norm.to_csv('pydata/02. crime_in_Seoul_final.csv', sep=',',
                       encoding='utf-8')

	total_bill	tip	sex	smoker	day	time	size
0	16.99	1.01	Female	No	Sun	Dinner	2
1	10.34	1.66	Male	No	Sun	Dinner	3
2	21.01	3.50	Male	No	Sun	Dinner	3
3	23.68	3.31	Male	No	Sun	Dinner	2
4	24.59	3.61	Female	No	Sun	Dinner	4

	year	month	passengers
0	1949	January	112
1	1949	February	118
2	1949	March	132
3	1949	April	129
4	1949	May	121

year	1949	1950	1951	1952	1953	1954	1955	1956	1957	1958	1959	1960
month
January	112	115	145	171	196	204	242	284	315	340	360	417
February	118	126	150	180	196	188	233	277	301	318	342	391
March	132	141	178	193	236	235	267	317	356	362	406	419
April	129	135	163	181	235	227	269	313	348	348	396	461
May	121	125	172	183	229	234	270	318	355	363	420	472

	sepal_length	sepal_width	petal_length	petal_width	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa
5	5.4	3.9	1.7	0.4	setosa
6	4.6	3.4	1.4	0.3	setosa
7	5.0	3.4	1.5	0.2	setosa
8	4.4	2.9	1.4	0.2	setosa
9	4.9	3.1	1.5	0.1	setosa

	dataset	x	y
0	I	10.0	8.04
1	I	8.0	6.95
2	I	13.0	7.58
3	I	9.0	8.81
4	I	11.0	8.33

조환희의 학습 블로그

티스토리 뷰

서울시 범죄 현황 분석 -3

제 2장 서울시 범죄 현황 분석¶

구성 및 블로깅 진행과정¶

2-8 좀 더 편리한 시각화 도구 - Seaborn¶

2-9 범죄 데이터 시각화하기¶

'beginner > 파이썬 분석' 카테고리의 다른 글

티스토리툴바

	강간	강도	살인	절도	폭력	강간검거율	강도검거율	살인검거율	절도검거율	폭력검거율	인구수	CCTV	범죄	검거
구별
강남구	1.000000	0.941176	0.916667	0.953472	0.661386	77.728285	85.714286	76.923077	42.857143	86.484594	570500.0	2780	4.472701	369.707384
강동구	0.155620	0.058824	0.166667	0.445775	0.289667	78.846154	100.000000	75.000000	33.347422	82.890855	453233.0	773	1.116551	370.084431
강북구	0.146974	0.529412	0.416667	0.126924	0.274769	82.352941	92.857143	100.000000	43.096234	88.637222	330192.0	748	1.494746	406.943540
관악구	0.628242	0.411765	0.583333	0.562094	0.428234	69.062500	100.000000	88.888889	30.561715	80.109157	525515.0	1496	2.613667	368.622261
광진구	0.397695	0.529412	0.166667	0.671570	0.269094	91.666667	100.000000	100.000000	42.200925	83.047619	372164.0	707	2.034438	416.915211

	강간	강도	살인	절도	폭력	강간검거율	강도검거율	살인검거율	절도검거율	폭력검거율	인구수	CCTV	범죄	검거
구별
도봉구	0.000000	0.235294	0.083333	0.000000	0.000000	100.000000	100.0	100.0	44.967074	87.626093	348646.0	485	0.318627	100.000000
금천구	0.141210	0.058824	0.083333	0.172426	0.134074	80.794702	100.0	100.0	56.668794	86.465433	255082.0	1015	0.589867	97.997139
광진구	0.397695	0.529412	0.166667	0.671570	0.269094	91.666667	100.0	100.0	42.200925	83.047619	372164.0	707	2.034438	96.375820
동대문구	0.204611	0.470588	0.250000	0.314061	0.250887	84.393064	100.0	100.0	41.090358	87.401884	369496.0	1294	1.490147	95.444250
용산구	0.265130	0.529412	0.250000	0.169004	0.133128	89.175258	100.0	100.0	37.700706	83.121951	244203.0	1624	1.346674	94.776790

시카고 샌드위치 맛집 분석 -1 (0)	2019.07.05
서울시 범죄 현황 분석 -4 (2)	2019.07.03
서울시 범죄 현황 분석 -2 (0)	2019.07.03
서울시 범죄 현황 분석 -1 (0)	2019.07.03
서울시 구별 CCTV 현황 분석 -3 (0)	2019.06.29

« 2025/04 »
일	월	화	수	목	금	토
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30