import glob, os
from datetime import date
# Plotly offline
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import warnings
warnings.filterwarnings('ignore')
# Import lecture/stockage données
import numpy as np
import pandas as pd
# Visualisation
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
import calmap
import folium
from IPython.core.display import display, HTML
# Palette
colorGrey = '#393e46'
colorRed = '#ff2e63'
colorCyan = '#21bf73'
colorYellow = '#fe9801'
# On charge et map l'ensemble des csv en un seul
# dfGlobalDaily = pd.concat(map(pd.read_csv, glob.glob(os.path.join('', "../COVID-19/csse_covid_19_data/csse_covid_19_daily_reports/*.csv"))))
# dfGlobalDailyLatest = pd.read_csv("../COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed/" + str(d1) + ".csv")
dfGlobalDailyLatest = pd.read_csv("../COVID-19/csse_covid_19_data/csse_covid_19_daily_reports/03-23-2020.csv")
# Chargement des times series pour les cas confirmés
dfGlobalSeriesConfirmed = pd.read_csv("../COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")
# Chargement des times series pour les cas décédés
dfGlobalSeriesDeaths = pd.read_csv("../COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv")
# Chargement des times series pour les cas soignés
dfGlobalSeriesRecovered = pd.read_csv("../COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv")
# Cas ACTIVE = cas confirmé - cas décédés - cas soignés
dfGlobalDailyLatest['Active'] = dfGlobalDailyLatest['Confirmed'] - dfGlobalDailyLatest['Deaths'] - dfGlobalDailyLatest['Recovered']
globalAll = dfGlobalDailyLatest.groupby(['Country_Region'])['Confirmed', 'Deaths', 'Recovered', 'Active'].sum()
classement = globalAll.sort_values(by='Confirmed', ascending=False)
classement = classement.reset_index(drop=False)
classement.style.background_gradient(cmap='Reds')
display(HTML('<div class="flourish-embed flourish-bar-chart-race" data-src="visualisation/1619129"><script src="https://public.flourish.studio/resources/embed.js"></script></div>'))
fig = px.bar(classement.sort_values('Deaths', ascending=False).head(10).sort_values('Deaths', ascending=True),
x="Deaths", y="Country_Region", text='Deaths', orientation='h',
width=800, height=500, range_x = [0, max(classement['Deaths'])+500])
fig.update_traces(marker_color=colorRed, opacity=0.6, textposition='outside')
iplot(fig)
top10Active = px.bar(classement.sort_values('Active', ascending=False).head(10).sort_values('Active', ascending=True),
x="Active", y="Country_Region", text='Active', orientation='h',
width=800, height=500, range_x = [0, max(classement['Active'])+5000])
top10Active.update_traces(marker_color=colorGrey, opacity=0.6, textposition='outside')
iplot(top10Active)
top10Confirmed = px.bar(classement.sort_values('Confirmed', ascending=False).head(10).sort_values('Confirmed', ascending=True),
x="Confirmed", y="Country_Region", text='Confirmed', orientation='h',
width=800, height=500, range_x = [0, max(classement['Confirmed'])+8000])
top10Confirmed.update_traces(marker_color=colorYellow, opacity=0.6, textposition='outside')
iplot(top10Confirmed)
top10Recovered = px.bar(classement.sort_values('Recovered', ascending=False).head(10).sort_values('Recovered', ascending=True),
x="Recovered", y="Country_Region", text='Recovered', orientation='h',
width=800, height=500, range_x = [0, max(classement['Recovered'])+6000])
top10Recovered.update_traces(marker_color=colorCyan, opacity=0.6, textposition='outside')
iplot(top10Recovered)
# Gestion du tableau des cas confirmés
tmpConfirmed = dfGlobalSeriesConfirmed
tmpConfirmed = tmpConfirmed.drop(columns=['Lat', 'Long'])
tmpConfirmed = tmpConfirmed.groupby(['Country/Region']).sum().reset_index()
tmpConfirmed = tmpConfirmed.drop(columns=['Country/Region'])
tmpConfirmed = tmpConfirmed.sum().reset_index()
tmpConfirmed = tmpConfirmed.rename(columns={'index': 'Date', 0: 'Confirmed'})
# Gestion du tableau des cas décédés
tmpDeaths = dfGlobalSeriesDeaths
tmpDeaths = tmpDeaths.drop(columns=['Lat', 'Long'])
tmpDeaths = tmpDeaths.groupby(['Country/Region']).sum().reset_index()
tmpDeaths = tmpDeaths.drop(columns=['Country/Region'])
tmpDeaths = tmpDeaths.sum().reset_index()
tmpDeaths = tmpDeaths.rename(columns={'index': 'Date', 0: 'Deaths'})
# Gestion du tableau des cas soignés
tmpRecovered = dfGlobalSeriesRecovered
tmpRecovered = tmpRecovered.drop(columns=['Lat', 'Long'])
tmpRecovered = tmpRecovered.groupby(['Country/Region']).sum().reset_index()
tmpRecovered = tmpRecovered.drop(columns=['Country/Region'])
tmpRecovered = tmpRecovered.sum().reset_index()
tmpRecovered = tmpRecovered.rename(columns={'index': 'Date', 0: 'Recovered'})
# On join nos trois tableaux
tmpAll = pd.merge(tmpConfirmed, tmpDeaths, on='Date').merge(tmpRecovered, on='Date')
# On ajoute notre colonne des cas encore malade
tmpAll['Active'] = tmpAll['Confirmed'] - tmpAll['Deaths'] - tmpAll['Recovered']
figAll = tmpAll.melt(id_vars="Date",
value_vars=['Confirmed', 'Deaths', 'Recovered', 'Active'],
value_name='Nombre de cas',
var_name='Type')
figFrSeriesAll = px.bar(figAll,
x="Date",
y="Nombre de cas",
color='Type',
title='Mondial',
barmode='group',
color_discrete_sequence=[colorYellow, colorRed, colorCyan, colorGrey])
iplot(figFrSeriesAll)
top10MortalityRate = classement
# Ajout de la nouvelle colonne tauxMortalité = (Morts/Confirmé)*100, arrondi à deux decimals
top10MortalityRate['Mortality Rate'] = round((top10MortalityRate['Deaths']/top10MortalityRate['Confirmed'])*100, 2)
# On prend que les pays qui ont plus de 100 cas confirmés
temp = top10MortalityRate[top10MortalityRate['Confirmed']>100]
temp = temp.sort_values('Mortality Rate', ascending=False)
fig = px.bar(temp.sort_values('Mortality Rate', ascending=False).head(15).sort_values('Mortality Rate', ascending=True),
x="Mortality Rate", y="Country_Region", text='Mortality Rate', orientation='h',
width=800, height=500, range_x = [0, 12])
fig.update_traces(marker_color=colorRed, opacity=0.6, textposition='outside')
iplot(fig)
fraSeriesConfirmed = dfGlobalSeriesConfirmed[(dfGlobalSeriesConfirmed['Country/Region'] == 'France') & (dfGlobalSeriesConfirmed['Province/State'] == 'France')]
fraSeriesConfirmed = fraSeriesConfirmed.drop(columns=['Lat', 'Long'])
fraSeriesDeaths = dfGlobalSeriesDeaths[(dfGlobalSeriesDeaths['Country/Region'] == 'France') & (dfGlobalSeriesDeaths['Province/State'] == 'France')]
fraSeriesDeaths = fraSeriesDeaths.drop(columns=['Lat', 'Long'])
fraSeriesAll = pd.concat([fraSeriesConfirmed, fraSeriesDeaths])
fraSeriesAll = fraSeriesAll.drop(columns=['Province/State', 'Country/Region'])
fraSeriesAll = fraSeriesAll.reset_index()
fraSeriesAll = fraSeriesAll.drop(columns=['index'])
fraSeriesAll = fraSeriesAll.transpose()
fraSeriesAll = fraSeriesAll.rename(columns={0: 'Confirmed', 1: 'Deaths'})
fraSeriesAll = fraSeriesAll.reset_index()
fraSeriesAll = fraSeriesAll.rename(columns={'index' : 'Date'})
fraSeriesAll['France'] = round((fraSeriesAll['Deaths']/fraSeriesAll['Confirmed']), 3)*100
#fraSeriesAll = fraSeriesAll.fillna({'Mortality Rate': 0.00})
itaSeriesConfirmed = dfGlobalSeriesConfirmed[(dfGlobalSeriesConfirmed['Country/Region'] == 'Italy')]
itaSeriesConfirmed = itaSeriesConfirmed.drop(columns=['Lat', 'Long'])
itaSeriesDeaths = dfGlobalSeriesDeaths[(dfGlobalSeriesDeaths['Country/Region'] == 'Italy')]
itaSeriesDeaths = itaSeriesDeaths.drop(columns=['Lat', 'Long'])
itaSeriesAll = pd.concat([itaSeriesConfirmed, itaSeriesDeaths])
itaSeriesAll = itaSeriesAll.drop(columns=['Province/State', 'Country/Region'])
itaSeriesAll = itaSeriesAll.reset_index()
itaSeriesAll = itaSeriesAll.drop(columns=['index'])
itaSeriesAll = itaSeriesAll.transpose()
itaSeriesAll = itaSeriesAll.rename(columns={0: 'Confirmed', 1: 'Deaths'})
itaSeriesAll = itaSeriesAll.reset_index()
itaSeriesAll = itaSeriesAll.rename(columns={'index' : 'Date'})
itaSeriesAll['Italie'] = round((itaSeriesAll['Deaths']/itaSeriesAll['Confirmed']), 3)*100
#itaSeriesAll = itaSeriesAll.fillna({'Mortality Rate': 0.00})
chiSeriesConfirmed = dfGlobalSeriesConfirmed[(dfGlobalSeriesConfirmed['Country/Region'] == 'China')]
chiSeriesConfirmed = chiSeriesConfirmed.drop(columns=['Lat', 'Long', 'Country/Region', 'Province/State'])
chiSeriesDeaths = dfGlobalSeriesDeaths[(dfGlobalSeriesDeaths['Country/Region'] == 'China')]
chiSeriesDeaths = chiSeriesDeaths.drop(columns=['Lat', 'Long', 'Country/Region', 'Province/State'])
chiSeriesConfirmed = chiSeriesConfirmed.sum().reset_index()
chiSeriesConfirmed = chiSeriesConfirmed.rename(columns={'index': 'Date', 0: 'Confirmed'})
chiSeriesDeaths = chiSeriesDeaths.sum().reset_index()
chiSeriesDeaths = chiSeriesDeaths.rename(columns={'index': 'Date', 0: 'Deaths'})
chiSeriesAll = pd.merge(chiSeriesConfirmed, chiSeriesDeaths, on='Date')
chiSeriesAll['Chine'] = round((chiSeriesAll['Deaths']/chiSeriesAll['Confirmed']), 3)*100
tempAll = pd.merge(chiSeriesAll[['Date', 'Chine']], itaSeriesAll[['Date', 'Italie']], on='Date').merge(fraSeriesAll[['Date', 'France']], on='Date')
#tempAll = tempAll.fillna(0.0)
tempAll = tempAll.melt(id_vars='Date', value_vars=['Chine', 'France', 'Italie'],
var_name='Pays', value_name='Taux')
fig = px.line(tempAll, x="Date", y="Taux", color='Pays', log_y=True,
title='Taux de létalité', color_discrete_sequence=['#d21629', '#162cd2', '#008000'])
iplot(fig)
fraSeriesConfirmed = dfGlobalSeriesConfirmed[(dfGlobalSeriesConfirmed['Country/Region'] == 'France') & (dfGlobalSeriesConfirmed['Province/State'] == 'France')]
fraSeriesConfirmed = fraSeriesConfirmed.drop(columns=['Lat', 'Long'])
fraSeriesRecovered = dfGlobalSeriesRecovered[(dfGlobalSeriesRecovered['Country/Region'] == 'France') & (dfGlobalSeriesRecovered['Province/State'] == 'France')]
fraSeriesRecovered = fraSeriesRecovered.drop(columns=['Lat', 'Long'])
fraSeriesAll = pd.concat([fraSeriesConfirmed, fraSeriesRecovered])
fraSeriesAll = fraSeriesAll.drop(columns=['Province/State', 'Country/Region'])
fraSeriesAll = fraSeriesAll.reset_index()
fraSeriesAll = fraSeriesAll.drop(columns=['index'])
fraSeriesAll = fraSeriesAll.transpose()
fraSeriesAll = fraSeriesAll.rename(columns={0: 'Confirmed', 1: 'Recovered'})
fraSeriesAll = fraSeriesAll.reset_index()
fraSeriesAll = fraSeriesAll.rename(columns={'index' : 'Date'})
cmp10Fra = fraSeriesAll
fraSeriesAll['France'] = round((fraSeriesAll['Recovered']/fraSeriesAll['Confirmed']), 3)*100
#fraSeriesAll = fraSeriesAll.fillna({'Mortality Rate': 0.00})
itaSeriesConfirmed = dfGlobalSeriesConfirmed[(dfGlobalSeriesConfirmed['Country/Region'] == 'Italy')]
itaSeriesConfirmed = itaSeriesConfirmed.drop(columns=['Lat', 'Long'])
itaSeriesRecovered = dfGlobalSeriesRecovered[(dfGlobalSeriesRecovered['Country/Region'] == 'Italy')]
itaSeriesRecovered = itaSeriesRecovered.drop(columns=['Lat', 'Long'])
itaSeriesAll = pd.concat([itaSeriesConfirmed, itaSeriesDeaths])
itaSeriesAll = itaSeriesAll.drop(columns=['Province/State', 'Country/Region'])
itaSeriesAll = itaSeriesAll.reset_index()
itaSeriesAll = itaSeriesAll.drop(columns=['index'])
itaSeriesAll = itaSeriesAll.transpose()
itaSeriesAll = itaSeriesAll.rename(columns={0: 'Confirmed', 1: 'Recovered'})
itaSeriesAll = itaSeriesAll.reset_index()
itaSeriesAll = itaSeriesAll.rename(columns={'index' : 'Date'})
cmp10Ita = itaSeriesAll
itaSeriesAll['Italie'] = round((itaSeriesAll['Recovered']/itaSeriesAll['Confirmed']), 3)*100
#itaSeriesAll = itaSeriesAll.fillna({'Mortality Rate': 0.00})
chiSeriesConfirmed = dfGlobalSeriesConfirmed[(dfGlobalSeriesConfirmed['Country/Region'] == 'China')]
chiSeriesConfirmed = chiSeriesConfirmed.drop(columns=['Lat', 'Long', 'Country/Region', 'Province/State'])
chiSeriesRecovered = dfGlobalSeriesRecovered[(dfGlobalSeriesRecovered['Country/Region'] == 'China')]
chiSeriesRecovered = chiSeriesRecovered.drop(columns=['Lat', 'Long', 'Country/Region', 'Province/State'])
chiSeriesConfirmed = chiSeriesConfirmed.sum().reset_index()
chiSeriesConfirmed = chiSeriesConfirmed.rename(columns={'index': 'Date', 0: 'Confirmed'})
chiSeriesRecovered = chiSeriesRecovered.sum().reset_index()
chiSeriesRecovered = chiSeriesRecovered.rename(columns={'index': 'Date', 0: 'Recovered'})
chiSeriesAll = pd.merge(chiSeriesConfirmed, chiSeriesRecovered, on='Date')
cmp10Chi = chiSeriesAll
chiSeriesAll['Chine'] = round((chiSeriesAll['Recovered']/chiSeriesAll['Confirmed']), 3)*100
tempAll = pd.merge(chiSeriesAll[['Date', 'Chine']], itaSeriesAll[['Date', 'Italie']], on='Date').merge(fraSeriesAll[['Date', 'France']], on='Date')
#tempAll = tempAll.fillna(0.0)
tempAll = tempAll.melt(id_vars='Date', value_vars=['Chine', 'France', 'Italie'],
var_name='Pays', value_name='Taux')
fig = px.line(tempAll, x="Date", y="Taux", color='Pays', log_y=True,
title='Taux de guérison', color_discrete_sequence=['#d21629', '#162cd2', '#008000'])
iplot(fig)
On compare les données à partir de 10 cas positifs
cmp10Ita = itaSeriesConfirmed
cmp10Ita = cmp10Ita.drop(columns=['Province/State', 'Country/Region'])
cmp10Ita = cmp10Ita.transpose()
cmp10Ita = cmp10Ita.rename(columns={16: 'Italie'})
cmp10Ita = cmp10Ita[(cmp10Ita['Italie'] > 9)]
cmp10Ita.index = pd.RangeIndex(len(cmp10Ita.index))
cmp10Fra = fraSeriesConfirmed
cmp10Fra = cmp10Fra.drop(columns=['Province/State', 'Country/Region'])
cmp10Fra = cmp10Fra.transpose()
cmp10Fra = cmp10Fra.rename(columns={157: 'France'})
cmp10Fra = cmp10Fra[(cmp10Fra['France'] > 9)]
cmp10Fra.index = pd.RangeIndex(len(cmp10Fra.index))
cmp10Chi = chiSeriesConfirmed
cmp10Chi = cmp10Chi.rename(columns={'Confirmed': "Chine"})
cmp10Chi = cmp10Chi[(cmp10Chi['Chine'] > 9)]
cmp10Chi.index = pd.RangeIndex(len(cmp10Chi.index))
cmp10Global = pd.concat([cmp10Chi, cmp10Fra, cmp10Ita], axis=1)
cmp10Global = cmp10Global.reset_index()
cmp10Global = cmp10Global.rename(columns={'index': "Jours"})
cmp10GlobalShifted = cmp10Global
cmp10Global = cmp10Global.melt(id_vars='Jours', value_vars=['Chine', 'France', 'Italie'],
var_name='Legende', value_name='Value')
iplot(px.line(cmp10Global, x="Jours", y="Value", color='Legende', log_y=True,
title='Evolution de l\'épidemie à partir de 10 cas infectés' , color_discrete_sequence=[colorCyan, colorRed, colorYellow]))
En decalant les données de la france de 17 jours, on peut apercevoir que l'on suit exactement la même courbe de l'évolution de cas infectés de l'Italie
Si on décale aussi la courbe de l'Italie, celle-ci aussi se rapproche sensiblement de l'évolution qu'a connu la Chine
tmp10GlobalShifted = cmp10GlobalShifted.copy()
tmp10GlobalShifted.France = tmp10GlobalShifted.France.shift(-17)
fig10GlobalShifted = tmp10GlobalShifted.melt(id_vars='Jours', value_vars=['Chine', 'France', 'Italie'],
var_name='Legende', value_name='Value')
iplot(px.line(fig10GlobalShifted, x="Jours", y="Value", color='Legende', log_y=True,
title='Evolution de l\'épidemie à partir de 10 cas infectés' , color_discrete_sequence=[colorCyan, colorRed, colorYellow]))
source data : https://github.com/CSSEGISandData/COVID-19