목차
# 그래프에 한글 출력하기 위한 코드
from matplotlib import font_manager, rc
font_name = font_manager.FontProperties(
fname = 'c:/Windows/Fonts/malgun.ttf').get_name()
rc('font', family=font_name)
# import matplotlib.pyplot as plt
# plt.rc('font', family='AppleGothic') # mac
# plt.rc('font', family='Malgun Gothic') # window
1. 성별, 나이별
import numpy as np
import pandas as pd
# 출처 : 질병관리본부
confirmed = pd.read_csv('./data/daily_Confirmed.csv', index_col = ['Date'])
#pd.read_csv('./data.csv', encoding='utf-8') # Default(기본)
#pd.read_csv('./data.csv', encoding='cp949') # 11172자
#pd.read_csv('./data.csv', encoding='euc-kr') # 2350자
confirmed.info() # confirmed의 정보 확인
<class 'pandas.core.frame.DataFrame'>
Index: 32 entries, 2020-03-01 to 2020-04-01
Data columns (total 11 columns):
Female 32 non-null int64
Male 32 non-null int64
0-9 32 non-null int64
10- 32 non-null int64
20-29 32 non-null int64
30-39 32 non-null int64
40-49 32 non-null int64
50-59 32 non-null int64
60-69 32 non-null int64
70-79 32 non-null int64
80- 32 non-null int64
dtypes: int64(11)
memory usage: 3.0+ KB
confirmed.head() # defult 값은 5
Out[-]
Female Male 0-9 10- 20-29 30-39 40-49 50-59 60-69 70-79 80-
Date
2020-03-01 2197 1329 27 137 1054 426 521 687 453 158 63
2020-03-02 2621 1591 32 169 1235 506 633 834 530 192 81
2020-03-03 3002 1810 34 204 1417 578 713 952 597 224 93
2020-03-04 3332 1996 34 233 1575 631 790 1051 646 260 108
2020-03-05 3617 2149 38 257 1727 6 59 847 1127 699 288 124
confirmed.tail()
Out[-]
Female Male 0-9 10- 20-29 30-39 40-49 50-59 60-69 70-79 80-
Date
2020-03-28 5742 3736 109 501 2567 978 1278 1780 1201 632 432
2020-03-29 5784 3799 111 508 2602 991 1292 1798 1210 635 434
2020-03-30 5827 3836 112 513 2630 1002 1297 1812 1218 640 437
2020-03-31 5881 3905 112 515 2656 1012 1312 1851 1235 651 442
2020-04-01 5941 3946 116 519 2682 1027 1323 1865 1245 658 452
confirmed.isnull().sum() # 결측치 개수 구하기
Out[-]
Female 0
Male 0
0-9 0
10- 0
20-29 0
30-39 0
40-49 0
50-59 0
60-69 0
70-79 0
80- 0
dtype: int64
# 혹시 결측치가 있다면!
# pd.to_numeric(df['컬럼이름'], errors='coerce') -> nan
confirmed['Female'].describe()
Out[-]
count 32.000000
mean 4882.531250
std 999.970515
min 2197.000000
25% 4547.250000
50% 5146.500000
75% 5601.000000
max 5986.000000
Name: Female, dtype: float64