LAB SEM 3 GRUPO H PRIMER PARTE Ver 1 1 Finish

GRUPO H
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas
from google.colab import files

upload =files.upload()
<IPython.core.display.HTML object>
Saving fallecidos_covid19_Corregido.csv to
fallecidos_covid19_Corregido.csv
data=pd.read_csv("fallecidos_covid19_Corregido.csv",sep=';',encoding='
latin-1')
data.describe()
Year Month Day contar_fall

contar_pos \
count 44201.000000 44201.000000 44201.000000 44201.000000
44201.000000
mean 2020.510848 5.459492 15.860659 2.014095
6.268908
std 0.499888 2.641368 8.789915 1.977751
10.858368
min 2020.000000 1.000000 1.000000 1.000000
1.000000
25% 2020.000000 4.000000 8.000000 1.000000
1.000000
50% 2021.000000 5.000000 16.000000 1.000000
3.000000
75% 2021.000000 7.000000 23.000000 2.000000
7.000000
max 2021.000000 12.000000 31.000000 30.000000
369.000000
Riesgo_distrito Nombre_Año
count 44201.000000 44201.000000
mean 2.493337 2020.510848
std 1.118502 0.499888
min 1.000000 2020.000000
25% 1.000000 2020.000000
50% 2.000000 2021.000000
75% 3.000000 2021.000000
max 4.000000 2021.000000
data.head()
DEPARTAMENTO PROVINCIA DISTRITO SEXO Year Month

Day \
0 AMAZONAS BAGUA BAGUA FEMENINO 2020 7 20
1 AMAZONAS BAGUA BAGUA MASCULINO 2021 4 23
2 AMAZONAS UTCUBAMBA BAGUA GRANDE MASCULINO 2021 2 3
3 ANCASH CASMA CASMA MASCULINO 2020 5 8
4 ANCASH CASMA CASMA MASCULINO 2021 4 7
contar_fall METODODX contar_pos Riesgo_distrito Fecha

Nombre_Mes \
0 1 PR 4 4 20/07/2020
Julio
1 2 PR 1 4 23/04/2021
Abril
2 1 PR 1 4 3/02/2021
Febrero
3 1 PR 2 3 8/05/2020
Mayo
4 1 PR 1 2 7/04/2021
Abril
Nombre_Dia Nombre_Año
0 lunes 2020
1 viernes 2021
2 miércoles 2021
3 viernes 2020
4 miércoles 2021
data.tail()
DEPARTAMENTO PROVINCIA DISTRITO SEXO Year

Month \
44196 UCAYALI CORONEL PORTILLO MANANTAY FEMENINO 2021
4
44197 UCAYALI CORONEL PORTILLO MANANTAY MASCULINO 2020
6
44198 UCAYALI CORONEL PORTILLO YARINACOCHA FEMENINO 2020
5
44199 UCAYALI CORONEL PORTILLO YARINACOCHA FEMENINO 2020
10
44200 UCAYALI PADRE ABAD PADRE ABAD MASCULINO 2020
5
Day contar_fall METODODX contar_pos Riesgo_distrito
Fecha \
44196 8 3 AG 1 2
8/04/2021
44197 1 1 PR 13 2
1/06/2020
44198 6 5 PR 4 3
6/05/2020
44199 11 1 PR 2 4
11/10/2020
44200 19 1 PR 2 1
19/05/2020
Nombre_Mes Nombre_Dia Nombre_Año

44196 Abril jueves 2021
44197 Junio lunes 2020
44198 Mayo miércoles 2020
44199 Octubre domingo 2020
44200 Mayo martes 2020
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44201 entries, 0 to 44200
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 DEPARTAMENTO 44201 non-null object
1 PROVINCIA 44201 non-null object
2 DISTRITO 44201 non-null object
3 SEXO 44201 non-null object
4 Year 44201 non-null int64
5 Month 44201 non-null int64
6 Day 44201 non-null int64
7 contar_fall 44201 non-null int64
8 METODODX 44201 non-null object
9 contar_pos 44201 non-null int64
10 Riesgo_distrito 44201 non-null int64
11 Fecha 44201 non-null object
12 Nombre_Mes 44201 non-null object
13 Nombre_Dia 44201 non-null object
14 Nombre_Año 44201 non-null int64
dtypes: int64(7), object(8)
memory usage: 5.1+ MB
data.columns
Index(['DEPARTAMENTO', 'PROVINCIA', 'DISTRITO', 'SEXO', 'Year',

'Month', 'Day',
'contar_fall', 'METODODX', 'contar_pos', 'Riesgo_distrito',
'Fecha',
'Nombre_Mes', 'Nombre_Dia', 'Nombre_Año'],
dtype='object')
sns.heatmap(data.isnull(),yticklabels=False,cbar=False,cmap='viridis')
<AxesSubplot:>
data.isnull().sum()
DEPARTAMENTO 0
PROVINCIA 0
DISTRITO 0
SEXO 0
Year 0
Month 0
Day 0
contar_fall 0
METODODX 0
contar_pos 0
Riesgo_distrito 0
Fecha 0
Nombre_Mes 0
Nombre_Dia 0
Nombre_Año 0
dtype: int64
superdepartamento=pd.value_counts(data['DEPARTAMENTO'])
superdepartamento
AREQUIPA 4424
PIURA 4204
LA LIBERTAD 3575
JUNIN 3271
ICA 2987
LAMBAYEQUE 2967
LIMA REGION 2800
CALLAO 2623
ANCASH 2358
CUSCO 2034
CAJAMARCA 1577
LORETO 1358
SAN MARTIN 1266
PUNO 1252
UCAYALI 1246
TACNA 1025
HUANUCO 978
AYACUCHO 789
MOQUEGUA 673
APURIMAC 585
TUMBES 573
AMAZONAS 508
PASCO 396
MADRE DE DIOS 378
HUANCAVELICA 354
Name: DEPARTAMENTO, dtype: int64
super=pd.value_counts(data['PROVINCIA'])
super
AREQUIPA 3836
CALLAO 2623
TRUJILLO 2543
PIURA 2258
CHICLAYO 2193
...
PAUCARTAMBO 1
CANDARAVE 1
VILCAS HUAMAN 1
SUCRE 1
ANTONIO RAIMONDI 1
Name: PROVINCIA, Length: 186, dtype: int64
super=pd.value_counts(data['SEXO'])
super
MASCULINO 23504
FEMENINO 20697
Name: SEXO, dtype: int64
super=pd.value_counts(data['Nombre_Mes'])
super
Mayo 6434
Junio 6132
Abril 5748
Agosto 4850
Julio 4515
Marzo 4152
Febrero 4056
Enero 2761
Setiembre 2552
Octubre 1269
Diciembre 917
Noviembre 815
Name: Nombre_Mes, dtype: int64
super=pd.value_counts(data['Nombre_Año'])
super
2021 22580
2020 21621
Name: Nombre_Año, dtype: int64
#histograma departamento
plt.figure(figsize=(10,5))
sns.countplot(x='DEPARTAMENTO',data=data,palette='rainbow')
#rotar 90 grados
plt.xticks(rotation=90)
plt.show()
superdepartamento=pd.DataFrame(superdepartamento)
superdepartamento.columns=["Frec_abs"]
superdepartamento["Frec_rel_%"]=100*superdepartamento["Frec_abs"]/
len(data)
valor_ac=0
Frec_rel_val=superdepartamento["Frec_rel_%"].values
for i in Frec_rel_val:
valor_ac=valor_ac+i
superdepartamento
Frec_abs Frec_rel_%
AREQUIPA 4424 10.008823
PIURA 4204 9.511097
LA LIBERTAD 3575 8.088052
JUNIN 3271 7.400285
ICA 2987 6.757766
LAMBAYEQUE 2967 6.712518
LIMA REGION 2800 6.334698
CALLAO 2623 5.934255
ANCASH 2358 5.334721
CUSCO 2034 4.601706
CAJAMARCA 1577 3.567793
LORETO 1358 3.072329
SAN MARTIN 1266 2.864189
PUNO 1252 2.832515
UCAYALI 1246 2.818941
TACNA 1025 2.318952
HUANUCO 978 2.212620
AYACUCHO 789 1.785027
MOQUEGUA 673 1.522590
APURIMAC 585 1.323499
TUMBES 573 1.296351
AMAZONAS 508 1.149295
PASCO 396 0.895907
MADRE DE DIOS 378 0.855184
HUANCAVELICA 354 0.800887
torta para atributo cualitativo nominal con labels

superdepartamento['Frec_rel_%'].plot(kind='pie',labels =
['AREQUIPA','PIURA','LA LIBERTAD','JUNIN','ICA','LAMBAYEQUE',
'LIMA
REGION','CALLAO','ANCASH','CUSCO','CAJAMARCA','LORETO','SAN
MARTIN','PUNO',
'UCAYALI','TACNA','HUANUCO','AYACUCHO','MOQUEGUA','APURIMAC','TUMBES',
'AMAZONAS','PASCO','MADRE DE
DIOS','HUANCAVELICA'],autopct='%.2f',
title='DEPARTAMENTO',figsize=(20,20))
<AxesSubplot:title={'center':'DEPARTAMENTO'}, ylabel='Frec_rel_%'>
labels = ['AREQUIPA','PIURA','LA LIBERTAD','JUNIN','ICA','LAMBAYEQUE',
'LIMA
REGION','CALLAO','ANCASH','CUSCO','CAJAMARCA','LORETO','SAN
MARTIN','PUNO',
'UCAYALI','TACNA','HUANUCO','AYACUCHO','MOQUEGUA','APURIMAC','TUMBES',
'AMAZONAS','PASCO','MADRE DE
DIOS','HUANCAVELICA']
explode = [0.1,0.2,0.3,0.4,0.3,0.2,0.1,0.2,0.3,0.4,0.3,
0.2,0.1,0.2,0.3,0.4,0.3,0.2,0.1,0.2,0.3,0.4,0.3,
0.2,0.1]###diferencia del resto
colors = sns.color_palette('pastel')
plt.pie(superdepartamento['Frec_rel_%'], labels=labels,colors =
colors, autopct = '%0.0f%%',explode = explode)
plt.show()
plt.figure(figsize=(20, 20))
<Figure size 2000x2000 with 0 Axes>
torta para atributo cualitativo nominal sin labels

super=pd.value_counts(data['Riesgo_distrito'])
super
2 11159
1 11120
4 11004
3 10918
Name: Riesgo_distrito, dtype: int64
super=pd.value_counts(data['contar_pos'])
super
1 14063
2 7387
3 4617
4 3007
5 2232
...
158 1
292 1
115 1
227 1
130 1
Name: contar_pos, Length: 147, dtype: int64
super=pd.value_counts(data['METODODX'])
super
PR 35277
AG 8924
Name: METODODX, dtype: int64
super=pd.DataFrame(super)
super.columns=["Frec_abs"]
super["Frec_rel_%"]=100*super["Frec_abs"]/len(data)
valor_ac=0
Frec_rel_val=super["Frec_rel_%"].values
valor_ac=valor_ac+i
super
Frec_abs Frec_rel_%
PR 35277 79.810412
AG 8924 20.189588
super['Frec_rel_%'].plot(kind='pie',labels = ['PR',
'AG'],autopct='%.2f', title='METODODX',figsize=(10,10))
<AxesSubplot:title={'center':'METODODX'}, ylabel='Frec_rel_%'>
labels = ['PR', 'AG']
explode = [0.1,0.2]###diferencia del resto
colors = sns.color_palette('pastel')
plt.pie(super['Frec_rel_%'], labels=labels,colors = colors, autopct =
'%0.0f%%',explode = explode)
plt.show()
dato cualitativo ordinal
super=pd.value_counts(data['SEXO'])
super
MASCULINO 23504
FEMENINO 20697
Name: SEXO, dtype: int64
super=pd.DataFrame(super)
super.columns=["Frec_abs"]
super["Frec_rel_%"]=100*super["Frec_abs"]/len(data)
valor_ac=0
Frec_rel_val=super["Frec_rel_%"].values
acum=[]
valor_ac=valor_ac+i
acum.append(valor_ac)
super["Frec_rel_%_acum"]= acum
super
Frec_abs Frec_rel_% Frec_rel_%_acum

MASCULINO 23504 53.175268 53.175268
FEMENINO 20697 46.824732 100.000000
# Bring some raw data.

frequencies = super['Frec_rel_%']
# In my original code I create a series and run on that,

# so for consistency I create a series from the list.
freq_series = pd.Series(frequencies)
x_labels = ['MASCULINO', 'FEMENINO']
# Plot the figure.

ax = freq_series.plot(kind='bar')
ax.set_title('Amount Frequency')
ax.set_xlabel('Amount ($)')
ax.set_ylabel('Frequency')
ax.set_xticklabels(x_labels)
def add_value_labels(ax, spacing=5):

"""Add labels to the end of each bar in a bar chart.
Arguments:
ax (matplotlib.axes.Axes): The matplotlib object containing
the axes
of the plot to annotate.
spacing (int): The distance between the labels and the bars.
"""
# For each bar: Place a label

for rect in ax.patches:
# Get X and Y placement of label from rect.
y_value = rect.get_height()
x_value = rect.get_x() + rect.get_width() / 2
# Number of points between bar and label. Change to your

liking.
space = spacing
# Vertical alignment for positive values
va = 'bottom'
# If value of bar is negative: Place label below bar

if y_value < 0:
# Invert space to place label below
space *= -1
# Vertically align label at top
va = 'top'
# Use Y value as label and format number with one decimal

place
label = "{:.1f}".format(y_value)
# Create annotation
ax.annotate(
label, # Use `label` as label
(x_value, y_value), # Place label at end of the
bar
xytext=(0, space), # Vertically shift label by
`space`
textcoords="offset points", # Interpret `xytext` as offset
in points
ha='center', # Horizontally center label
va=va) # Vertically align label
differently for
# positive and negative
values.
# Call the function above. All the magic happens there.

add_value_labels(ax)
plt.savefig("image.png")
sns.countplot(x="METODODX", hue="SEXO",data=data,palette='Set3')
<AxesSubplot:xlabel='METODODX', ylabel='count'>
plt.figure(figsize=(10,5))
sns.catplot(x="Nombre_Mes", hue="DEPARTAMENTO", col="SEXO",
data=data, kind="count",
height=4, aspect=1 )
plt.show()

sns.countplot(x="Nombre_Mes", hue="METODODX",data=data,palette='Set2')
plt.show()
data['DEPARTAMENTO'].hist()
plt.show()
# Veamos el Gender vs. Monthly Income
sns.boxplot(x = 'contar_pos', y = 'SEXO', data = data)
<AxesSubplot:xlabel='contar_pos', ylabel='SEXO'>
sns.boxplot(x = 'contar_fall', y = 'METODODX', data = data)
<AxesSubplot:xlabel='contar_fall', ylabel='METODODX'>
#cantidad de fallecidos por SEXO
sns.boxplot(x = 'contar_fall', y = 'SEXO', data = data)
<AxesSubplot:xlabel='contar_fall', ylabel='SEXO'>
data.groupby(['DEPARTAMENTO','SEXO'])
['contar_fall'].sum().unstack().plot(kind='bar',stacked=True,figsize=(
15,10))
<AxesSubplot:xlabel='DEPARTAMENTO'>
#positivos por departamento
data.groupby(['DEPARTAMENTO','SEXO'])
['contar_pos'].sum().unstack().plot(kind='bar',stacked=True,figsize=(1
5,10))
<AxesSubplot:xlabel='DEPARTAMENTO'>
data['contar_fall'].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x7f5357448290>
data['contar_pos'].hist()
<matplotlib.axes._subplots.AxesSubplot at 0x7f5356e6a610>
sns.boxplot(y = 'contar_fall', x = 'Riesgo_distrito', data = data)
<matplotlib.axes._subplots.AxesSubplot at 0x7f5356dd2250>
sns.boxplot(y = 'contar_pos', x = 'Riesgo_distrito', data = data)
<matplotlib.axes._subplots.AxesSubplot at 0x7f5356e64a50>

LAB SEM 3 GRUPO H PRIMER PARTE Ver 1 1 Finish

Cargado por

Información del documento

Título original

Derechos de autor

Formatos disponibles

Compartir este documento

Compartir o incrustar documentos

Opciones para compartir

¿Le pareció útil este documento?

¿Este contenido es inapropiado?

Copyright:

Formatos disponibles

LAB SEM 3 GRUPO H PRIMER PARTE Ver 1 1 Finish

Cargado por

Copyright:

Formatos disponibles

GRUPO H

from google.colab import files

Year Month Day contar_fall

DEPARTAMENTO PROVINCIA DISTRITO SEXO Year Month

1 AMAZONAS BAGUA BAGUA MASCULINO 2021 4 23

2 AMAZONAS UTCUBAMBA BAGUA GRANDE MASCULINO 2021 2 3

3 ANCASH CASMA CASMA MASCULINO 2020 5 8

4 ANCASH CASMA CASMA MASCULINO 2021 4 7

contar_fall METODODX contar_pos Riesgo_distrito Fecha

DEPARTAMENTO PROVINCIA DISTRITO SEXO Year

Nombre_Mes Nombre_Dia Nombre_Año

Index(['DEPARTAMENTO', 'PROVINCIA', 'DISTRITO', 'SEXO', 'Year',

torta para atributo cualitativo nominal con labels

<Figure size 2000x2000 with 0 Axes>

torta para atributo cualitativo nominal sin labels

Frec_abs Frec_rel_% Frec_rel_%_acum

# Bring some raw data.

# In my original code I create a series and run on that,

x_labels = ['MASCULINO', 'FEMENINO']

# Plot the figure.

def add_value_labels(ax, spacing=5):

# For each bar: Place a label

# Number of points between bar and label. Change to your

# If value of bar is negative: Place label below bar

# Use Y value as label and format number with one decimal

# Call the function above. All the magic happens there.

<Figure size 1000x500 with 0 Axes>

También podría gustarte