https://s3-us-west-2.amazonaws.com/secure.notion-static.com/e6a690f9-bfda-4dcf-a2d0-d8b909bd7c86/---.jpg

목차

# 그래프에 한글 출력하기 위한 코드
from matplotlib import font_manager, rc

font_name = font_manager.FontProperties(
    fname = 'c:/Windows/Fonts/malgun.ttf').get_name()
rc('font', family=font_name)

# import matplotlib.pyplot as plt
# plt.rc('font', family='AppleGothic') # mac
# plt.rc('font', family='Malgun Gothic') # window

1. 성별, 나이별

import numpy as np
import pandas as pd

# 출처 : 질병관리본부
confirmed = pd.read_csv('./data/daily_Confirmed.csv', index_col = ['Date'])
#pd.read_csv('./data.csv', encoding='utf-8') # Default(기본)
#pd.read_csv('./data.csv', encoding='cp949') # 11172자
#pd.read_csv('./data.csv', encoding='euc-kr') # 2350자
confirmed.info() # confirmed의 정보 확인
<class 'pandas.core.frame.DataFrame'>
Index: 32 entries, 2020-03-01 to 2020-04-01
Data columns (total 11 columns):
Female    32 non-null int64
Male      32 non-null int64
0-9       32 non-null int64
10-       32 non-null int64
20-29     32 non-null int64
30-39     32 non-null int64
40-49     32 non-null int64
50-59     32 non-null int64
60-69     32 non-null int64
70-79     32 non-null int64
80-       32 non-null int64
dtypes: int64(11)
memory usage: 3.0+ KB
confirmed.head() # defult 값은 5
Out[-]
						Female	Male	0-9	 10- 20-29	30-39	40-49	50-59	60-69	70-79	80-
			Date																				
2020-03-01	  2197	1329	27	137	  1054	 426	 521	 687	 453	 158	63
2020-03-02	  2621	1591	32	169	  1235	 506	 633	 834	 530	 192	81
2020-03-03	  3002	1810	34	204	  1417	 578	 713	 952	 597	 224	93
2020-03-04	  3332	1996	34	233	  1575	 631	 790	1051	 646	 260	108
2020-03-05	  3617	2149	38	257	  1727	6 59	 847	1127	 699	 288	124
confirmed.tail() 
Out[-]
						Female	Male	0-9	 10- 20-29	30-39	40-49	50-59	60-69	70-79	80-
			Date											
2020-03-28	  5742	3736	109	 501	2567	 978	1278	1780	1201	 632	432
2020-03-29	  5784	3799	111	 508	2602	 991	1292	1798	1210	 635	434
2020-03-30	  5827	3836	112	 513	2630	1002	1297	1812	1218	 640	437
2020-03-31	  5881	3905	112	 515	2656	1012	1312	1851	1235	 651	442
2020-04-01	  5941	3946	116	 519	2682	1027	1323	1865	1245	 658	452
confirmed.isnull().sum()  # 결측치 개수 구하기
Out[-]
Female    0
Male      0
0-9       0
10-       0
20-29     0
30-39     0
40-49     0
50-59     0
60-69     0
70-79     0
80-       0
dtype: int64
# 혹시 결측치가 있다면!
# pd.to_numeric(df['컬럼이름'], errors='coerce') -> nan
confirmed['Female'].describe()
Out[-]
count      32.000000
mean     4882.531250
std       999.970515
min      2197.000000
25%      4547.250000
50%      5146.500000
75%      5601.000000
max      5986.000000
Name: Female, dtype: float64