Documentos de Académico
Documentos de Profesional
Documentos de Cultura
CARRERA:
ING. ELÉCTRICA
ASIGNATURA:
PAQUETES ESTADÍSTICOS
TEMA:
REGRESIÓN LINEAL MÚLTIPLE
INTEGRANTES:
HIDALGO IVAN
PAULA JONATHAN
CURSO:
10mo
PARALELO:
“A”
DOCENTE:
ING. CARLOS ROA
QUEVEDO-ECUADOR
AÑO:
2020-2021
#INICIALMENTE ACCEDEMOS A LA LIBRERÍA ISLR
> library(MASS)
> library(ISLR)
# se debe instalar la librería “ISRL”
✓ La variable MEDV significa “el valor de la vivienda” [la mediana de
los valores]
#DAMO A CONOCER LOS VALORES DE “BOSTON”
> data("Boston")
> str(Boston)
'data.frame': 506 obs. of 14 variables:
$ crim : num 0.00632 0.02731 0.02729 0.03237 0.06905 ...
$ zn : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
$ indus : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
$ chas : int 0 0 0 0 0 0 0 0 0 0 ...
$ nox : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
$ rm : num 6.58 6.42 7.18 7 7.15 ...
$ age : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
$ dis : num 4.09 4.97 4.97 6.06 6.06 ...
$ rad : int 1 2 2 3 3 3 5 5 5 5 ...
$ tax : num 296 242 242 222 222 222 311 311 311 311 ...
$ ptratio: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
$ black : num 397 397 393 395 397 ...
$ lstat : num 4.98 9.14 4.03 2.94 5.33 ...
$ medv : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
# COMO PRIMER PUNTO VISUALIZAMOS EN FORMA DE GRAFICA TODOS LOS DATOS
DE “BOSTON”
Residuals:
Min 1Q Median 3Q Max
-16.957 -5.449 -2.007 2.512 29.800
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 24.03311 0.40914 58.74 <2e-16 ***
crim -0.41519 0.04389 -9.46 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Call:
lm(formula = medv ~ crim + zn, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-15.421 -5.060 -1.558 2.121 30.765
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 22.48563 0.44173 50.904 < 2e-16 ***
crim -0.35208 0.04259 -8.267 1.24e-15 ***
zn 0.11611 0.01571 7.392 6.09e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Call:
lm(formula = medv ~ crim + zn + indus, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-13.070 -4.733 -1.585 2.648 32.423
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 27.39465 0.86484 31.676 < 2e-16 ***
crim -0.24863 0.04391 -5.662 2.52e-08 ***
zn 0.05850 0.01750 3.344 0.000889 ***
indus -0.41558 0.06378 -6.515 1.77e-10 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Call:
lm(formula = medv ~ crim + zn + indus + chas, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-12.805 -4.618 -1.472 2.594 32.969
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 27.11280 0.84523 32.077 < 2e-16 ***
crim -0.22880 0.04299 -5.322 1.55e-07 ***
zn 0.05929 0.01706 3.474 0.000557 ***
indus -0.44033 0.06239 -7.057 5.68e-12 ***
chas 6.89406 1.33266 5.173 3.33e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Call:
lm(formula = medv ~ crim + zn + indus + chas + nox, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-12.717 -4.681 -1.462 2.616 32.891
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 29.48994 2.22435 13.258 < 2e-16 ***
crim -0.21852 0.04389 -4.979 8.83e-07 ***
zn 0.05511 0.01744 3.160 0.00167 **
indus -0.38348 0.07944 -4.827 1.84e-06 ***
chas 7.02622 1.33712 5.255 2.20e-07 ***
nox -5.42466 4.69551 -1.155 0.24852
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# medv como variable dependiente y crim + zn + indus + chas + nox + rm como variable
independiente
> regresion6 = lm ( medv ~ crim + zn + indus + chas + nox + rm, data=Boston)
> summary(regresion6)
Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-21.016 -3.420 -0.684 2.506 39.467
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -17.95464 3.21376 -5.587 3.81e-08 ***
crim -0.17691 0.03459 -5.114 4.50e-07 ***
zn 0.02128 0.01385 1.537 0.1249
indus -0.14365 0.06394 -2.247 0.0251 *
chas 4.78468 1.05909 4.518 7.81e-06 ***
nox -7.18489 3.69353 -1.945 0.0523 .
rm 7.34159 0.41720 17.597 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + age,
data = Boston)
Residuals:
Min 1Q Median 3Q Max
-21.471 -3.254 -0.680 2.188 39.882
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -18.26493 3.21458 -5.682 2.27e-08 ***
crim -0.17276 0.03464 -4.988 8.45e-07 ***
zn 0.01421 0.01451 0.979 0.3278
indus -0.13090 0.06433 -2.035 0.0424 *
chas 4.84073 1.05802 4.575 6.01e-06 ***
nox -4.35741 4.08878 -1.066 0.2871
rm 7.38636 0.41748 17.693 < 2e-16 ***
age -0.02362 0.01476 -1.601 0.1100
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# medv como variable dependiente y crim + zn + indus + chas + nox + rm + age + dis como variable
independiente
regresion8 = lm ( medv ~ crim + zn + indus + chas + nox + rm + age + dis, data=Boston)
> summary(regresion8)
Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + age +
dis, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-19.677 -3.136 -0.679 2.111 37.696
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.82748 3.97461 0.208 0.835164
crim -0.19779 0.03303 -5.987 4.09e-09 ***
zn 0.06099 0.01512 4.034 6.34e-05 ***
indus -0.22573 0.06234 -3.621 0.000324 ***
chas 4.57760 1.00452 4.557 6.54e-06 ***
nox -14.45153 4.10695 -3.519 0.000473 ***
rm 6.75235 0.40507 16.670 < 2e-16 ***
age -0.05564 0.01464 -3.801 0.000162 ***
dis -1.76031 0.23496 -7.492 3.12e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + age +
dis + rad, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-19.337 -3.138 -0.731 2.063 38.228
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.15539 4.02673 0.039 0.969233
crim -0.17804 0.03812 -4.670 3.88e-06 ***
zn 0.06095 0.01512 4.032 6.41e-05 ***
indus -0.21004 0.06414 -3.275 0.001132 **
chas 4.53665 1.00522 4.513 7.99e-06 ***
nox -13.34267 4.24337 -3.144 0.001764 **
rm 6.79118 0.40676 16.696 < 2e-16 ***
age -0.05626 0.01465 -3.841 0.000139 ***
dis -1.74830 0.23522 -7.433 4.70e-13 ***
rad -0.04529 0.04364 -1.038 0.299901
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.63 on 496 degrees of freedom
Multiple R-squared: 0.6319, Adjusted R-squared: 0.6253 EL VALOR DE “R” = 62.5%
F-statistic: 94.63 on 9 and 496 DF, p-value: < 2.2e-16
# medv como variable dependiente y crim + zn + indus + chas + nox + rm + age + dis + rad + tax
como variable independiente
regresion10 = lm ( medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad + tax, data=Boston)
> summary(regresion10)
Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + age +
dis + rad + tax, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-18.607 -2.970 -0.759 2.044 38.100
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.990787 4.082317 0.733 0.464137
crim -0.179554 0.037761 -4.755 2.61e-06 ***
zn 0.071456 0.015317 4.665 3.98e-06 ***
indus -0.104377 0.071342 -1.463 0.144085
chas 4.110667 1.004190 4.094 4.96e-05 ***
nox -12.591596 4.209226 -2.991 0.002915 **
rm 6.664084 0.404771 16.464 < 2e-16 ***
age -0.054668 0.014518 -3.766 0.000186 ***
dis -1.727933 0.233063 -7.414 5.35e-13 ***
rad 0.159263 0.076267 2.088 0.037288 *
tax -0.014341 0.004405 -3.255 0.001210 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.576 on 495 degrees of freedom
Multiple R-squared: 0.6397, Adjusted R-squared: 0.6324 EL VALOR DE “R” = 63.2%
F-statistic: 87.87 on 10 and 495 DF, p-value: < 2.2e-16
# medv como variable dependiente y crim + zn + indus + chas + nox + rm + age + dis + rad + tax +
ptratio como variable independiente
> regresion11 = lm ( medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad + tax + ptratio,
data=Boston)
> summary(regresion11)
Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + age +
dis + rad + tax + ptratio, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-15.846 -2.749 -0.624 1.994 37.089
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 27.152368 5.290506 5.132 4.12e-07 ***
crim -0.184032 0.036162 -5.089 5.12e-07 ***
zn 0.039100 0.015424 2.535 0.011551 *
indus -0.042324 0.068920 -0.614 0.539425
chas 3.487528 0.965890 3.611 0.000337 ***
nox -22.182110 4.271529 -5.193 3.03e-07 ***
rm 6.075744 0.397168 15.298 < 2e-16 ***
age -0.045188 0.013971 -3.234 0.001300 **
dis -1.583852 0.224166 -7.066 5.47e-12 ***
rad 0.254722 0.074371 3.425 0.000666 ***
tax -0.012213 0.004229 -2.887 0.004053 **
ptratio -0.996206 0.146998 -6.777 3.50e-11 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.339 on 494 degrees of freedom
Multiple R-squared: 0.6703, Adjusted R-squared: 0.663 EL VALOR DE “R” = 66.3%
F-statistic: 91.31 on 11 and 494 DF, p-value: < 2.2e-16
# medv como variable dependiente y crim + zn + indus + chas + nox + rm + age + dis + rad + tax +
ptratio + black como variable independiente
> regresion12 = lm ( medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad + tax + ptratio +
black, data=Boston)
> summary(regresion12)
Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + age +
dis + rad + tax + ptratio + black, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-16.473 -2.622 -0.824 1.869 36.228
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 20.652628 5.367759 3.848 0.000135 ***
crim -0.159939 0.035803 -4.467 9.84e-06 ***
zn 0.038874 0.015111 2.573 0.010386 *
indus -0.027922 0.067592 -0.413 0.679716
chas 3.216569 0.948069 3.393 0.000748 ***
nox -20.484560 4.200663 -4.877 1.46e-06 ***
rm 6.123072 0.389238 15.731 < 2e-16 ***
age -0.045932 0.013688 -3.356 0.000853 ***
dis -1.554912 0.219703 -7.077 5.08e-12 ***
rad 0.281575 0.073089 3.852 0.000132 ***
tax -0.011738 0.004145 -2.832 0.004815 **
ptratio -1.014223 0.144066 -7.040 6.48e-12 ***
black 0.013621 0.002925 4.657 4.14e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
MÉTODO “FORWARD”
# PARA PODER REALIZAR ESTE MÉTODO, SE TUVO QUE INGRESAR NUEVAMENTE LA
LIBRERÍA “ISRL” ; CON LOS DATOS DE BOSTON
> regresionForward<-regsubsets(as.matrix(Boston[,-1]),Boston[,1], nbest = 1, nvmax = NULL,
method='forward')
> summary(regresionForward)
Subset selection object
13 Variables (and intercept)
Forced in Forced out
zn FALSE FALSE
indus FALSE FALSE
chas FALSE FALSE
nox FALSE FALSE
rm FALSE FALSE
age FALSE FALSE
dis FALSE FALSE
rad FALSE FALSE
tax FALSE FALSE
ptratio FALSE FALSE
black FALSE FALSE
lstat FALSE FALSE
medv FALSE FALSE
1 subsets of each size up to 13
Selection Algorithm: forward
zn indus chas nox rm age dis rad tax ptratio black lstat medv
1 ( 1 ) " " " " " " " " " " " " " " "*" " " " " "" "" ""
2 ( 1 ) " " " " " " " " " " " " " " "*" " " " " " " "*" " "
3 ( 1 ) " " " " " " " " " " " " " " "*" " " " " "*" "*" " "
4 ( 1 ) " " " " " " " " " " " " " " "*" " " " " "*" "*" "*"
5 ( 1 ) "*" " " " " " " " " " " " " "*" " " " " "*" "*" "*"
6 ( 1 ) "*" " " " " " " " " " " "*" "*" " " " " "*" "*" "*"
7 ( 1 ) "*" " " " " "*" " " " " "*" "*" " " " " "*" "*" "*"
8 ( 1 ) "*" " " " " "*" " " " " "*" "*" " " "*" "*" "*" "*"
9 ( 1 ) "*" "*" " " "*" " " " " "*" "*" " " "*" "*" "*" "*"
10 ( 1 ) "*" "*" " " "*" "*" " " "*" "*" " " "*" "*" "*" "*"
11 ( 1 ) "*" "*" " " "*" "*" " " "*" "*" "*" "*" "*" "*" "*"
12 ( 1 ) "*" "*" "*" "*" "*" " " "*" "*" "*" "*" "*" "*" "*"
13 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
> summary(regresionForward)$rsq
[1] 0.3912567 0.4207965 0.4286123 0.4313872 0.4343367 0.4417256 0.4472745 0.4504606
0.4524408 0.4530572 0.4535605 0.4540031 0.4540104
> summary(regresionForward)$rss
[1] 22744.61 21640.91 21348.88 21245.21 21135.00 20858.93 20651.60 20532.56 20458.58
20435.55 20416.74 20400.20 20399.93
MÉTODO “STEP”
MÉTODOS DE CONSTRUCCIÓN DE LA ECUACIÓN DE
REGRESIÓN
#COMO ULTIMO UTILIZAMOS EL ULTIMO MODELO QUE ES EL “STEP”; COMO SE HIZO
EN LOS ANTERIORES MÉTODOS INGRESAMOS LA LIBRERÍA Y POSTERIORMENTE LOS
DATOS DE “BOSTON”
# POSTERIORMENTE DEBEMOS CALCULAR LA CORRECCIÓN ENTRE TODAS LAS
VARIABLES DISPONIBLES
> library(dplyr)
> datos <- as.data.frame(Boston)
> round(cor(x = datos, method = "pearson"), 4)
crim zn indus chas nox rm age dis rad tax ptratio black lstat medv
crim 1.0000 -0.2005 0.4066 -0.0559 0.4210 -0.2192 0.3527 -0.3797 0.6255 0.5828 0.2899 -
0.3851 0.4556 -0.3883
zn -0.2005 1.0000 -0.5338 -0.0427 -0.5166 0.3120 -0.5695 0.6644 -0.3119 -0.3146 -0.3917
0.1755 -0.4130 0.3604
indus 0.4066 -0.5338 1.0000 0.0629 0.7637 -0.3917 0.6448 -0.7080 0.5951 0.7208 0.3832 -
0.3570 0.6038 -0.4837
chas -0.0559 -0.0427 0.0629 1.0000 0.0912 0.0913 0.0865 -0.0992 -0.0074 -0.0356 -0.1215
0.0488 -0.0539 0.1753
nox 0.4210 -0.5166 0.7637 0.0912 1.0000 -0.3022 0.7315 -0.7692 0.6114 0.6680 0.1889 -
0.3801 0.5909 -0.4273
rm -0.2192 0.3120 -0.3917 0.0913 -0.3022 1.0000 -0.2403 0.2052 -0.2098 -0.2920 -0.3555
0.1281 -0.6138 0.6954
age 0.3527 -0.5695 0.6448 0.0865 0.7315 -0.2403 1.0000 -0.7479 0.4560 0.5065 0.2615 -
0.2735 0.6023 -0.3770
dis -0.3797 0.6644 -0.7080 -0.0992 -0.7692 0.2052 -0.7479 1.0000 -0.4946 -0.5344 -0.2325
0.2915 -0.4970 0.2499
rad 0.6255 -0.3119 0.5951 -0.0074 0.6114 -0.2098 0.4560 -0.4946 1.0000 0.9102 0.4647 -
0.4444 0.4887 -0.3816
tax 0.5828 -0.3146 0.7208 -0.0356 0.6680 -0.2920 0.5065 -0.5344 0.9102 1.0000 0.4609 -
0.4418 0.5440 -0.4685
ptratio 0.2899 -0.3917 0.3832 -0.1215 0.1889 -0.3555 0.2615 -0.2325 0.4647 0.4609 1.0000 -
0.1774 0.3740 -0.5078
black -0.3851 0.1755 -0.3570 0.0488 -0.3801 0.1281 -0.2735 0.2915 -0.4444 -0.4418 -0.1774
1.0000 -0.3661 0.3335
lstat 0.4556 -0.4130 0.6038 -0.0539 0.5909 -0.6138 0.6023 -0.4970 0.4887 0.5440 0.3740 -
0.3661 1.0000 -0.7377
medv -0.3883 0.3604 -0.4837 0.1753 -0.4273 0.6954 -0.3770 0.2499 -0.3816 -0.4685 -0.5078
0.3335 -0.7377 1.0000
> chooseCRANmirror()
> utils:::menuInstallPkgs()
# PARA PODER ELABORAR LOS GRÁFICOS DE MUESTRA EN LOS DIAGRAMAS DE
DISPERSIÓN Y LAS VARIABLES DE RECOLECCIÓN ENTRE TODAS LAS VARIABLES DE
LA BASE DE DATOS QUE FUE ANALIZADA
# SE OPTA POR INSTALAR LA LIBRERÍA “GGALLY”
> library(ggplot2)
> library(GGally)
> ggpairs(datos, lower = list(continuous = "smooth"),
+ diag = list(continuous = "barDiag"), axisLabels = "none")
Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + age +
dis + rad + tax + ptratio + black + lstat, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-15.595 -2.730 -0.518 1.777 26.199
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.646e+01 5.103e+00 7.144 3.28e-12 ***
crim -1.080e-01 3.286e-02 -3.287 0.001087 **
zn 4.642e-02 1.373e-02 3.382 0.000778 ***
indus 2.056e-02 6.150e-02 0.334 0.738288
chas 2.687e+00 8.616e-01 3.118 0.001925 **
nox -1.777e+01 3.820e+00 -4.651 4.25e-06 ***
rm 3.810e+00 4.179e-01 9.116 < 2e-16 ***
age 6.922e-04 1.321e-02 0.052 0.958229
dis -1.476e+00 1.995e-01 -7.398 6.01e-13 ***
rad 3.060e-01 6.635e-02 4.613 5.07e-06 ***
tax -1.233e-02 3.760e-03 -3.280 0.001112 **
ptratio -9.527e-01 1.308e-01 -7.283 1.31e-12 ***
black 9.312e-03 2.686e-03 3.467 0.000573 ***
lstat -5.248e-01 5.072e-02 -10.347 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Step: AIC=1587.65
medv ~ crim + zn + indus + chas + nox + rm + dis + rad + tax +
ptratio + black + lstat
> library(ggplot2)
> library(gridExtra)
Attaching package: ‘gridExtra’
The following object is masked from ‘package:dplyr’:
combine