Está en la página 1de 36

UNIVERSIDAD TÉCNICA ESTATAL DE QUEVEDO

FACULTAD DE CIENCIAS DE LA INGENIERÍA

CARRERA:
ING. ELÉCTRICA

ASIGNATURA:
PAQUETES ESTADÍSTICOS
TEMA:
REGRESIÓN LINEAL MÚLTIPLE
INTEGRANTES:
HIDALGO IVAN
PAULA JONATHAN
CURSO:
10mo
PARALELO:
“A”
DOCENTE:
ING. CARLOS ROA

QUEVEDO-ECUADOR
AÑO:

2020-2021
#INICIALMENTE ACCEDEMOS A LA LIBRERÍA ISLR

> library(MASS)
> library(ISLR)
# se debe instalar la librería “ISRL”
✓ La variable MEDV significa “el valor de la vivienda” [la mediana de
los valores]
#DAMO A CONOCER LOS VALORES DE “BOSTON”

> data("Boston")
> str(Boston)
'data.frame': 506 obs. of 14 variables:
$ crim : num 0.00632 0.02731 0.02729 0.03237 0.06905 ...
$ zn : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
$ indus : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
$ chas : int 0 0 0 0 0 0 0 0 0 0 ...
$ nox : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
$ rm : num 6.58 6.42 7.18 7 7.15 ...
$ age : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
$ dis : num 4.09 4.97 4.97 6.06 6.06 ...
$ rad : int 1 2 2 3 3 3 5 5 5 5 ...
$ tax : num 296 242 242 222 222 222 311 311 311 311 ...
$ ptratio: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
$ black : num 397 397 393 395 397 ...
$ lstat : num 4.98 9.14 4.03 2.94 5.33 ...
$ medv : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
# COMO PRIMER PUNTO VISUALIZAMOS EN FORMA DE GRAFICA TODOS LOS DATOS
DE “BOSTON”

#HAY QUE TENER EN CUENTA QUE LA VARIABLE “MEDV” ES LA VARIABLE


DEPENDIENTE Y “CRIM” ; NOX ; RAD ; ZN ; RM ; TAX ; INDUS ; AGE ; PTRATIO
#PARA OBTENER LOS DIAGRAMAS DE DISPERSIÓN ENTRE CADA VARIABLE
INDEPENDIENTE Y LA VARIABLE “MEDV”
> regresion1 = lm ( medv ~ crim, data=Boston)
# medv como variable dependiente y crim como variable independiente
> summary(regresion1)
Call:
lm(formula = medv ~ crim, data = Boston)

Residuals:
Min 1Q Median 3Q Max
-16.957 -5.449 -2.007 2.512 29.800

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 24.03311 0.40914 58.74 <2e-16 ***
crim -0.41519 0.04389 -9.46 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.484 on 504 degrees of freedom


Multiple R-squared: 0.1508, Adjusted R-squared: 0.1491 EL VALOR “R2 ES = 14.9%
F-statistic: 89.49 on 1 and 504 DF, p-value: < 2.2e-16

# medv como variable dependiente y crim + zn como variable independiente


> regresion2 = lm ( medv ~ crim + zn, data=Boston)
> summary(regresion2)

Call:
lm(formula = medv ~ crim + zn, data = Boston)

Residuals:
Min 1Q Median 3Q Max
-15.421 -5.060 -1.558 2.121 30.765

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 22.48563 0.44173 50.904 < 2e-16 ***
crim -0.35208 0.04259 -8.267 1.24e-15 ***
zn 0.11611 0.01571 7.392 6.09e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.065 on 503 degrees of freedom


Multiple R-squared: 0.234, Adjusted R-squared: 0.2309 EL VALOR DE “R” = 23%
F-statistic: 76.82 on 2 and 503 DF, p-value: < 2.2e-16
# medv como variable dependiente y crim + zn + indus como variable independiente
> regresion3 = lm ( medv ~ crim + zn + indus, data=Boston)
> summary(regresion3)

Call:
lm(formula = medv ~ crim + zn + indus, data = Boston)

Residuals:
Min 1Q Median 3Q Max
-13.070 -4.733 -1.585 2.648 32.423

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 27.39465 0.86484 31.676 < 2e-16 ***
crim -0.24863 0.04391 -5.662 2.52e-08 ***
zn 0.05850 0.01750 3.344 0.000889 ***
indus -0.41558 0.06378 -6.515 1.77e-10 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.752 on 502 degrees of freedom


Multiple R-squared: 0.2937, Adjusted R-squared: 0.2895 EL VALOR DE “R” = 28.9%
F-statistic: 69.59 on 3 and 502 DF, p-value: < 2.2e-16
# medv como variable dependiente y crim + zn + indus + chas como variable independiente
> regresion4 = lm ( medv ~ crim + zn + indus + chas, data=Boston)
> summary(regresion4)

Call:
lm(formula = medv ~ crim + zn + indus + chas, data = Boston)

Residuals:
Min 1Q Median 3Q Max
-12.805 -4.618 -1.472 2.594 32.969

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 27.11280 0.84523 32.077 < 2e-16 ***
crim -0.22880 0.04299 -5.322 1.55e-07 ***
zn 0.05929 0.01706 3.474 0.000557 ***
indus -0.44033 0.06239 -7.057 5.68e-12 ***
chas 6.89406 1.33266 5.173 3.33e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.561 on 501 degrees of freedom


Multiple R-squared: 0.3295, Adjusted R-squared: 0.3242 EL VALOR DE “R” = 32.42%
F-statistic: 61.56 on 4 and 501 DF, p-value: < 2.2e-16
# medv como variable dependiente y crim + zn + indus + chas + nox como variable independiente
> regresion5 = lm ( medv ~ crim + zn + indus + chas + nox, data=Boston)
> summary(regresion5)

Call:
lm(formula = medv ~ crim + zn + indus + chas + nox, data = Boston)

Residuals:
Min 1Q Median 3Q Max
-12.717 -4.681 -1.462 2.616 32.891

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 29.48994 2.22435 13.258 < 2e-16 ***
crim -0.21852 0.04389 -4.979 8.83e-07 ***
zn 0.05511 0.01744 3.160 0.00167 **
indus -0.38348 0.07944 -4.827 1.84e-06 ***
chas 7.02622 1.33712 5.255 2.20e-07 ***
nox -5.42466 4.69551 -1.155 0.24852
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7.558 on 500 degrees of freedom


Multiple R-squared: 0.3313, Adjusted R-squared: 0.3246 EL VALOR DE “R” = 32.4%
F-statistic: 49.55 on 5 and 500 DF, p-value: < 2.2e-16

# medv como variable dependiente y crim + zn + indus + chas + nox + rm como variable
independiente
> regresion6 = lm ( medv ~ crim + zn + indus + chas + nox + rm, data=Boston)
> summary(regresion6)

Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm, data = Boston)

Residuals:
Min 1Q Median 3Q Max
-21.016 -3.420 -0.684 2.506 39.467

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -17.95464 3.21376 -5.587 3.81e-08 ***
crim -0.17691 0.03459 -5.114 4.50e-07 ***
zn 0.02128 0.01385 1.537 0.1249
indus -0.14365 0.06394 -2.247 0.0251 *
chas 4.78468 1.05909 4.518 7.81e-06 ***
nox -7.18489 3.69353 -1.945 0.0523 .
rm 7.34159 0.41720 17.597 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5.943 on 499 degrees of freedom


Multiple R-squared: 0.5874, Adjusted R-squared: 0.5824 EL VALOR DE “R” = 58.2%
F-statistic: 118.4 on 6 and 499 DF, p-value: < 2.2e-16
# medv como variable dependiente y crim + zn + indus + chas + nox + rm + age como variable
independiente
> regresion7 = lm ( medv ~ crim + zn + indus + chas + nox + rm + age, data=Boston)
> summary(regresion7)

Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + age,
data = Boston)

Residuals:
Min 1Q Median 3Q Max
-21.471 -3.254 -0.680 2.188 39.882

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -18.26493 3.21458 -5.682 2.27e-08 ***
crim -0.17276 0.03464 -4.988 8.45e-07 ***
zn 0.01421 0.01451 0.979 0.3278
indus -0.13090 0.06433 -2.035 0.0424 *
chas 4.84073 1.05802 4.575 6.01e-06 ***
nox -4.35741 4.08878 -1.066 0.2871
rm 7.38636 0.41748 17.693 < 2e-16 ***
age -0.02362 0.01476 -1.601 0.1100
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5.934 on 498 degrees of freedom


Multiple R-squared: 0.5895, Adjusted R-squared: 0.5837 EL VALOR DE “R” = 58.3%
F-statistic: 102.2 on 7 and 498 DF, p-value: < 2.2e-16

# medv como variable dependiente y crim + zn + indus + chas + nox + rm + age + dis como variable
independiente
regresion8 = lm ( medv ~ crim + zn + indus + chas + nox + rm + age + dis, data=Boston)
> summary(regresion8)

Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + age +
dis, data = Boston)

Residuals:
Min 1Q Median 3Q Max
-19.677 -3.136 -0.679 2.111 37.696

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.82748 3.97461 0.208 0.835164
crim -0.19779 0.03303 -5.987 4.09e-09 ***
zn 0.06099 0.01512 4.034 6.34e-05 ***
indus -0.22573 0.06234 -3.621 0.000324 ***
chas 4.57760 1.00452 4.557 6.54e-06 ***
nox -14.45153 4.10695 -3.519 0.000473 ***
rm 6.75235 0.40507 16.670 < 2e-16 ***
age -0.05564 0.01464 -3.801 0.000162 ***
dis -1.76031 0.23496 -7.492 3.12e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5.63 on 497 degrees of freedom


Multiple R-squared: 0.6311, Adjusted R-squared: 0.6252 EL VALOR DE “R” = 62.5%
F-statistic: 106.3 on 8 and 497 DF, p-value: < 2.2e-16
# medv como variable dependiente y crim + zn + indus + chas + nox + rm + age + dis + rad como
variable independiente
> regresion9 = lm ( medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad, data=Boston)
> summary(regresion9)

Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + age +
dis + rad, data = Boston)

Residuals:
Min 1Q Median 3Q Max
-19.337 -3.138 -0.731 2.063 38.228

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.15539 4.02673 0.039 0.969233
crim -0.17804 0.03812 -4.670 3.88e-06 ***
zn 0.06095 0.01512 4.032 6.41e-05 ***
indus -0.21004 0.06414 -3.275 0.001132 **
chas 4.53665 1.00522 4.513 7.99e-06 ***
nox -13.34267 4.24337 -3.144 0.001764 **
rm 6.79118 0.40676 16.696 < 2e-16 ***
age -0.05626 0.01465 -3.841 0.000139 ***
dis -1.74830 0.23522 -7.433 4.70e-13 ***
rad -0.04529 0.04364 -1.038 0.299901
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.63 on 496 degrees of freedom
Multiple R-squared: 0.6319, Adjusted R-squared: 0.6253 EL VALOR DE “R” = 62.5%
F-statistic: 94.63 on 9 and 496 DF, p-value: < 2.2e-16

# medv como variable dependiente y crim + zn + indus + chas + nox + rm + age + dis + rad + tax
como variable independiente
regresion10 = lm ( medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad + tax, data=Boston)
> summary(regresion10)

Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + age +
dis + rad + tax, data = Boston)

Residuals:
Min 1Q Median 3Q Max
-18.607 -2.970 -0.759 2.044 38.100

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.990787 4.082317 0.733 0.464137
crim -0.179554 0.037761 -4.755 2.61e-06 ***
zn 0.071456 0.015317 4.665 3.98e-06 ***
indus -0.104377 0.071342 -1.463 0.144085
chas 4.110667 1.004190 4.094 4.96e-05 ***
nox -12.591596 4.209226 -2.991 0.002915 **
rm 6.664084 0.404771 16.464 < 2e-16 ***
age -0.054668 0.014518 -3.766 0.000186 ***
dis -1.727933 0.233063 -7.414 5.35e-13 ***
rad 0.159263 0.076267 2.088 0.037288 *
tax -0.014341 0.004405 -3.255 0.001210 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.576 on 495 degrees of freedom
Multiple R-squared: 0.6397, Adjusted R-squared: 0.6324 EL VALOR DE “R” = 63.2%
F-statistic: 87.87 on 10 and 495 DF, p-value: < 2.2e-16
# medv como variable dependiente y crim + zn + indus + chas + nox + rm + age + dis + rad + tax +
ptratio como variable independiente
> regresion11 = lm ( medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad + tax + ptratio,
data=Boston)
> summary(regresion11)

Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + age +
dis + rad + tax + ptratio, data = Boston)

Residuals:
Min 1Q Median 3Q Max
-15.846 -2.749 -0.624 1.994 37.089

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 27.152368 5.290506 5.132 4.12e-07 ***
crim -0.184032 0.036162 -5.089 5.12e-07 ***
zn 0.039100 0.015424 2.535 0.011551 *
indus -0.042324 0.068920 -0.614 0.539425
chas 3.487528 0.965890 3.611 0.000337 ***
nox -22.182110 4.271529 -5.193 3.03e-07 ***
rm 6.075744 0.397168 15.298 < 2e-16 ***
age -0.045188 0.013971 -3.234 0.001300 **
dis -1.583852 0.224166 -7.066 5.47e-12 ***
rad 0.254722 0.074371 3.425 0.000666 ***
tax -0.012213 0.004229 -2.887 0.004053 **
ptratio -0.996206 0.146998 -6.777 3.50e-11 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.339 on 494 degrees of freedom
Multiple R-squared: 0.6703, Adjusted R-squared: 0.663 EL VALOR DE “R” = 66.3%
F-statistic: 91.31 on 11 and 494 DF, p-value: < 2.2e-16

# medv como variable dependiente y crim + zn + indus + chas + nox + rm + age + dis + rad + tax +
ptratio + black como variable independiente
> regresion12 = lm ( medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad + tax + ptratio +
black, data=Boston)
> summary(regresion12)

Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + age +
dis + rad + tax + ptratio + black, data = Boston)

Residuals:
Min 1Q Median 3Q Max
-16.473 -2.622 -0.824 1.869 36.228

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 20.652628 5.367759 3.848 0.000135 ***
crim -0.159939 0.035803 -4.467 9.84e-06 ***
zn 0.038874 0.015111 2.573 0.010386 *
indus -0.027922 0.067592 -0.413 0.679716
chas 3.216569 0.948069 3.393 0.000748 ***
nox -20.484560 4.200663 -4.877 1.46e-06 ***
rm 6.123072 0.389238 15.731 < 2e-16 ***
age -0.045932 0.013688 -3.356 0.000853 ***
dis -1.554912 0.219703 -7.077 5.08e-12 ***
rad 0.281575 0.073089 3.852 0.000132 ***
tax -0.011738 0.004145 -2.832 0.004815 **
ptratio -1.014223 0.144066 -7.040 6.48e-12 ***
black 0.013621 0.002925 4.657 4.14e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5.231 on 493 degrees of freedom


Multiple R-squared: 0.6842, Adjusted R-squared: 0.6765 EL VALOR DE “R” = 67.6%
F-statistic: 89.01 on 12 and 493 DF, p-value: < 2.2e-16

MÉTODO “FORWARD”
# PARA PODER REALIZAR ESTE MÉTODO, SE TUVO QUE INGRESAR NUEVAMENTE LA
LIBRERÍA “ISRL” ; CON LOS DATOS DE BOSTON
> regresionForward<-regsubsets(as.matrix(Boston[,-1]),Boston[,1], nbest = 1, nvmax = NULL,
method='forward')
> summary(regresionForward)
Subset selection object
13 Variables (and intercept)
Forced in Forced out
zn FALSE FALSE
indus FALSE FALSE
chas FALSE FALSE
nox FALSE FALSE
rm FALSE FALSE
age FALSE FALSE
dis FALSE FALSE
rad FALSE FALSE
tax FALSE FALSE
ptratio FALSE FALSE
black FALSE FALSE
lstat FALSE FALSE
medv FALSE FALSE
1 subsets of each size up to 13
Selection Algorithm: forward
zn indus chas nox rm age dis rad tax ptratio black lstat medv
1 ( 1 ) " " " " " " " " " " " " " " "*" " " " " "" "" ""
2 ( 1 ) " " " " " " " " " " " " " " "*" " " " " " " "*" " "
3 ( 1 ) " " " " " " " " " " " " " " "*" " " " " "*" "*" " "
4 ( 1 ) " " " " " " " " " " " " " " "*" " " " " "*" "*" "*"
5 ( 1 ) "*" " " " " " " " " " " " " "*" " " " " "*" "*" "*"
6 ( 1 ) "*" " " " " " " " " " " "*" "*" " " " " "*" "*" "*"
7 ( 1 ) "*" " " " " "*" " " " " "*" "*" " " " " "*" "*" "*"
8 ( 1 ) "*" " " " " "*" " " " " "*" "*" " " "*" "*" "*" "*"
9 ( 1 ) "*" "*" " " "*" " " " " "*" "*" " " "*" "*" "*" "*"
10 ( 1 ) "*" "*" " " "*" "*" " " "*" "*" " " "*" "*" "*" "*"
11 ( 1 ) "*" "*" " " "*" "*" " " "*" "*" "*" "*" "*" "*" "*"
12 ( 1 ) "*" "*" "*" "*" "*" " " "*" "*" "*" "*" "*" "*" "*"
13 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"

> summary(regresionForward)$rsq
[1] 0.3912567 0.4207965 0.4286123 0.4313872 0.4343367 0.4417256 0.4472745 0.4504606
0.4524408 0.4530572 0.4535605 0.4540031 0.4540104
> summary(regresionForward)$rss
[1] 22744.61 21640.91 21348.88 21245.21 21135.00 20858.93 20651.60 20532.56 20458.58
20435.55 20416.74 20400.20 20399.93

#PARA TENER EN CUENTA EL MODELOS DE LAS 13 VARIABLES ; POR LO TANTO, EL


VALOR ES DE 45.40 % QUE COMPARANDO CON EL MODELO DE LA VARIABLE “R”
CUADRADO AJUSTADO QUE CONTIENE 12 VARIABLES Y NOS DA, 67.6 % Y PODEMOS
DARNOS CUENTA QUE HAY UNA GRAN DIFERENCIA ENTRE LOS DOS VALORES, POR
LO QUE OPTAMOS EN ESCOGER AL QUE CONTIENE 13 VARIABLES
MÉTODO “BACKWARD”
#ADICIONALMENTE REALIZAMOS EL SEGUNDO MÉTODO CON LOS MISMOS DATOS
UTILIZADOS EN EL MÉTODO ANTERIOR
> Backward<-regsubsets(as.matrix(Boston[,-1]), Boston[,1], nbest = 1, nvmax = NULL,
method='backward')
> summary(Backward)
Subset selection object
13 Variables (and intercept)
Forced in Forced out
zn FALSE FALSE
indus FALSE FALSE
chas FALSE FALSE
nox FALSE FALSE
rm FALSE FALSE
age FALSE FALSE
dis FALSE FALSE
rad FALSE FALSE
tax FALSE FALSE
ptratio FALSE FALSE
black FALSE FALSE
lstat FALSE FALSE
medv FALSE FALSE
1 subsets of each size up to 13
Selection Algorithm: backward
zn indus chas nox rm age dis rad tax ptratio black lstat medv
1 ( 1 ) " " " " " " " " " " " " " " "*" " " " " "" "" ""
2 ( 1 ) " " " " " " " " " " " " " " "*" " " " " " " " " "*"
3 ( 1 ) " " " " " " " " " " " " "*" "*" " " " " " " " " "*"
4 ( 1 ) "*" " " " " " " " " " " "*" "*" " " " " " " " " "*"
5 ( 1 ) "*" " " " " " " " " " " "*" "*" " " " " "*" " " "*"
6 ( 1 ) "*" " " " " "*" " " " " "*" "*" " " " " "*" " " "*"
7 ( 1 ) "*" " " " " "*" " " " " "*" "*" " " "*" "*" " " "*"
8 ( 1 ) "*" " " " " "*" " " " " "*" "*" " " "*" "*" "*" "*"
9 ( 1 ) "*" "*" " " "*" " " " " "*" "*" " " "*" "*" "*" "*"
10 ( 1 ) "*" "*" " " "*" "*" " " "*" "*" " " "*" "*" "*" "*"
11 ( 1 ) "*" "*" " " "*" "*" " " "*" "*" "*" "*" "*" "*" "*"
12 ( 1 ) "*" "*" "*" "*" "*" " " "*" "*" "*" "*" "*" "*" "*"
13 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
> summary(Backward)$adjr2
[1] 0.3900489 0.4151340 0.4187255 0.4289661 0.4336665 0.4373321 0.4398956 0.4416149
0.4425053 0.4420078 0.4413928 0.4407131 0.4395838
44.07 % ES EL “R” CUADRADO AJUSTADO PARA EL MODELO DE REGRESIÓN CON 12
VARIABLES INDEPENDIENTES
43.95 % ES EL “R” CUADRADO AJUSTADO PARA EL MODELO DE REGRESIÓN CON 13
VARIABLES INDEPENDIENTES
# POR LO QUE PODEMOS DEDUCIR; QUE EL VALOR DE 12 VARIABLES POR LO TANTO;
AGREGAR LA VARIABLE 13 NOS DA UNA MEJORÍA EN EL SIGNIFICADO EN EL
MODELO DE REGRESIÓN
# OTRA FORMA DE PODER ANALIZAR DE MEJOR MANERA DENTRO DE LA FUNCION
“REGSUBSETS” PARA QUE EL PROGRAMA NOS DE UN CALCULO CON LSO DOS
MEJORES MODELOS DE INGRESOS PARA CADA UNO DE ELLOS
> Backward2<-regsubsets(as.matrix(Boston[,-1]), Boston[,1], nbest = 2, nvmax = NULL,
method='backward')
> summary(Backward2)
Subset selection object
13 Variables (and intercept)
Forced in Forced out
zn FALSE FALSE
indus FALSE FALSE
chas FALSE FALSE
nox FALSE FALSE
rm FALSE FALSE
age FALSE FALSE
dis FALSE FALSE
rad FALSE FALSE
tax FALSE FALSE
ptratio FALSE FALSE
black FALSE FALSE
lstat FALSE FALSE
medv FALSE FALSE
2 subsets of each size up to 13
Selection Algorithm: backward
zn indus chas nox rm age dis rad tax ptratio black lstat medv
1 ( 1 ) " " " " " " " " " " " " " " "*" " " " " "" "" ""
1 ( 2 ) " " " " " " " " " " " " "*" " " " " " " "" "" ""
2 ( 1 ) " " " " " " " " " " " " " " "*" " " " " " " " " "*"
2 ( 2 ) " " " " " " " " " " " " "*" "*" " " " " "" "" ""
3 ( 1 ) " " " " " " " " " " " " "*" "*" " " " " " " " " "*"
3 ( 2 ) "*" " " " " " " " " " " "*" "*" " " " " "" "" ""
4 ( 1 ) "*" " " " " " " " " " " "*" "*" " " " " " " " " "*"
4 ( 2 ) "*" " " " " " " " " " " "*" "*" " " " " "*" " " " "
5 ( 1 ) "*" " " " " " " " " " " "*" "*" " " " " "*" " " "*"
5 ( 2 ) "*" " " " " "*" " " " " "*" "*" " " " " "*" " " " "
6 ( 1 ) "*" " " " " "*" " " " " "*" "*" " " " " "*" " " "*"
6 ( 2 ) "*" " " " " "*" " " " " "*" "*" " " "*" "*" " " " "
7 ( 1 ) "*" " " " " "*" " " " " "*" "*" " " "*" "*" " " "*"
7 ( 2 ) "*" " " " " "*" " " " " "*" "*" " " "*" "*" "*" " "
8 ( 1 ) "*" " " " " "*" " " " " "*" "*" " " "*" "*" "*" "*"
8 ( 2 ) "*" "*" " " "*" " " " " "*" "*" " " "*" "*" "*" " "
9 ( 1 ) "*" "*" " " "*" " " " " "*" "*" " " "*" "*" "*" "*"
9 ( 2 ) "*" "*" " " "*" "*" " " "*" "*" " " "*" "*" "*" " "
10 ( 1 ) "*" "*" " " "*" "*" " " "*" "*" " " "*" "*" "*" "*"
10 ( 2 ) "*" "*" " " "*" "*" " " "*" "*" "*" "*" "*" "*" " "
11 ( 1 ) "*" "*" " " "*" "*" " " "*" "*" "*" "*" "*" "*" "*"
11 ( 2 ) "*" "*" "*" "*" "*" " " "*" "*" "*" "*" "*" "*" " "
12 ( 1 ) "*" "*" "*" "*" "*" " " "*" "*" "*" "*" "*" "*" "*"
12 ( 2 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" " "
13 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
> summary(Backward2)$adjr2
[1] 0.3900489 0.1424513 0.4151340 0.3954053 0.4187255 0.3974000 0.4289661 0.4087519
0.4336665 0.4080195 0.4373321 0.4072802 0.4398956 0.4310615 0.4416149 0.4311528

MÉTODO “STEP”
MÉTODOS DE CONSTRUCCIÓN DE LA ECUACIÓN DE
REGRESIÓN
#COMO ULTIMO UTILIZAMOS EL ULTIMO MODELO QUE ES EL “STEP”; COMO SE HIZO
EN LOS ANTERIORES MÉTODOS INGRESAMOS LA LIBRERÍA Y POSTERIORMENTE LOS
DATOS DE “BOSTON”
# POSTERIORMENTE DEBEMOS CALCULAR LA CORRECCIÓN ENTRE TODAS LAS
VARIABLES DISPONIBLES
> library(dplyr)
> datos <- as.data.frame(Boston)
> round(cor(x = datos, method = "pearson"), 4)
crim zn indus chas nox rm age dis rad tax ptratio black lstat medv
crim 1.0000 -0.2005 0.4066 -0.0559 0.4210 -0.2192 0.3527 -0.3797 0.6255 0.5828 0.2899 -
0.3851 0.4556 -0.3883
zn -0.2005 1.0000 -0.5338 -0.0427 -0.5166 0.3120 -0.5695 0.6644 -0.3119 -0.3146 -0.3917
0.1755 -0.4130 0.3604
indus 0.4066 -0.5338 1.0000 0.0629 0.7637 -0.3917 0.6448 -0.7080 0.5951 0.7208 0.3832 -
0.3570 0.6038 -0.4837
chas -0.0559 -0.0427 0.0629 1.0000 0.0912 0.0913 0.0865 -0.0992 -0.0074 -0.0356 -0.1215
0.0488 -0.0539 0.1753
nox 0.4210 -0.5166 0.7637 0.0912 1.0000 -0.3022 0.7315 -0.7692 0.6114 0.6680 0.1889 -
0.3801 0.5909 -0.4273
rm -0.2192 0.3120 -0.3917 0.0913 -0.3022 1.0000 -0.2403 0.2052 -0.2098 -0.2920 -0.3555
0.1281 -0.6138 0.6954
age 0.3527 -0.5695 0.6448 0.0865 0.7315 -0.2403 1.0000 -0.7479 0.4560 0.5065 0.2615 -
0.2735 0.6023 -0.3770
dis -0.3797 0.6644 -0.7080 -0.0992 -0.7692 0.2052 -0.7479 1.0000 -0.4946 -0.5344 -0.2325
0.2915 -0.4970 0.2499
rad 0.6255 -0.3119 0.5951 -0.0074 0.6114 -0.2098 0.4560 -0.4946 1.0000 0.9102 0.4647 -
0.4444 0.4887 -0.3816
tax 0.5828 -0.3146 0.7208 -0.0356 0.6680 -0.2920 0.5065 -0.5344 0.9102 1.0000 0.4609 -
0.4418 0.5440 -0.4685
ptratio 0.2899 -0.3917 0.3832 -0.1215 0.1889 -0.3555 0.2615 -0.2325 0.4647 0.4609 1.0000 -
0.1774 0.3740 -0.5078
black -0.3851 0.1755 -0.3570 0.0488 -0.3801 0.1281 -0.2735 0.2915 -0.4444 -0.4418 -0.1774
1.0000 -0.3661 0.3335
lstat 0.4556 -0.4130 0.6038 -0.0539 0.5909 -0.6138 0.6023 -0.4970 0.4887 0.5440 0.3740 -
0.3661 1.0000 -0.7377
medv -0.3883 0.3604 -0.4837 0.1753 -0.4273 0.6954 -0.3770 0.2499 -0.3816 -0.4685 -0.5078
0.3335 -0.7377 1.0000
> chooseCRANmirror()
> utils:::menuInstallPkgs()
# PARA PODER ELABORAR LOS GRÁFICOS DE MUESTRA EN LOS DIAGRAMAS DE
DISPERSIÓN Y LAS VARIABLES DE RECOLECCIÓN ENTRE TODAS LAS VARIABLES DE
LA BASE DE DATOS QUE FUE ANALIZADA
# SE OPTA POR INSTALAR LA LIBRERÍA “GGALLY”
> library(ggplot2)
> library(GGally)
> ggpairs(datos, lower = list(continuous = "smooth"),
+ diag = list(continuous = "barDiag"), axisLabels = "none")

plot: [1,1] [--------------------------------------------------] 1% est: 0s `stat_bin()` using `bins = 30`. Pick


better value with `binwidth`.

# VEMOS A CONTINUACIÓN LA COMBINACIÓN DE TODAS LAS VARIABLES PARA ASI


PODER ANALIZAR
plot: [1,2] [>-------------------------------------------------] 1% est:16s
plot: [1,3] [>-------------------------------------------------] 2% est:17s
plot: [1,4] [>-------------------------------------------------] 2% est:15s
plot: [1,5] [>-------------------------------------------------] 3% est:14s
plot: [1,6] [=>------------------------------------------------] 3% est:13s
plot: [1,7] [=>------------------------------------------------] 4% est:13s
plot: [1,8] [=>------------------------------------------------] 4% est:12s
plot: [1,9] [=>------------------------------------------------] 5% est:12s
plot: [1,10] [=>-----------------------------------------------] 5% est:12s
plot: [1,11] [==>----------------------------------------------] 6% est:12s
plot: [1,12] [==>----------------------------------------------] 6% est:11s
plot: [1,13] [==>----------------------------------------------] 7% est:11s
plot: [1,14] [===>---------------------------------------------] 7% est:11s
plot: [2,1] [===>----------------------------------------------] 8% est:11s
plot: [2,2] [===>----------------------------------------------] 8% est:25s `stat_bin()` using `bins = 30`.
Pick better value with `binwidth`.
plot: [2,3] [===>----------------------------------------------] 9% est:26s
plot: [2,4] [====>---------------------------------------------] 9% est:25s
plot: [2,5] [====>---------------------------------------------] 10% est:24s
plot: [2,6] [====>---------------------------------------------] 10% est:23s
plot: [2,7] [====>---------------------------------------------] 11% est:23s
plot: [2,8] [=====>--------------------------------------------] 11% est:23s
plot: [2,9] [=====>--------------------------------------------] 12% est:22s
plot: [2,10] [=====>-------------------------------------------] 12% est:22s
plot: [2,11] [=====>-------------------------------------------] 13% est:21s
plot: [2,12] [=====>-------------------------------------------] 13% est:20s
plot: [2,13] [======>------------------------------------------] 14% est:25s
plot: [2,14] [======>------------------------------------------] 14% est:24s
plot: [3,1] [======>-------------------------------------------] 15% est:23s
plot: [3,2] [=======>------------------------------------------] 15% est:23s
plot: [3,3] [=======>------------------------------------------] 16% est:22s `stat_bin()` using `bins =
30`. Pick better value with `binwidth`.

plot: [3,4] [=======>------------------------------------------] 16% est:22s


plot: [3,5] [=======>------------------------------------------] 17% est:21s
plot: [3,6] [========>-----------------------------------------] 17% est:21s
plot: [3,7] [========>-----------------------------------------] 18% est:20s
plot: [3,8] [========>-----------------------------------------] 18% est:20s
plot: [3,9] [========>-----------------------------------------] 19% est:19s
plot: [3,10] [=========>---------------------------------------] 19% est:19s
plot: [3,11] [=========>---------------------------------------] 20% est:18s
plot: [3,12] [=========>---------------------------------------] 20% est:18s
plot: [3,13] [=========>---------------------------------------] 21% est:18s
plot: [3,14] [=========>---------------------------------------] 21% est:17s
plot: [4,1] [==========>---------------------------------------] 22% est:17s
plot: [4,2] [==========>---------------------------------------] 22% est:17s
plot: [4,3] [==========>---------------------------------------] 23% est:17s
plot: [4,4] [===========>--------------------------------------] 23% est:16s `stat_bin()` using `bins =
30`. Pick better value with `binwidth`.

plot: [4,5] [===========>--------------------------------------] 24% est:16s


plot: [4,6] [===========>--------------------------------------] 24% est:16s
plot: [4,7] [===========>--------------------------------------] 25% est:15s
plot: [4,8] [============>-------------------------------------] 26% est:15s
plot: [4,9] [============>-------------------------------------] 26% est:15s
plot: [4,10] [============>------------------------------------] 27% est:15s
plot: [4,11] [============>------------------------------------] 27% est:14s
plot: [4,12] [============>------------------------------------] 28% est:14s
plot: [4,13] [=============>-----------------------------------] 28% est:14s
plot: [4,14] [=============>-----------------------------------] 29% est:14s
plot: [5,1] [==============>-----------------------------------] 29% est:14s
plot: [5,2] [==============>-----------------------------------] 30% est:13s
plot: [5,3] [==============>-----------------------------------] 30% est:13s
plot: [5,4] [==============>-----------------------------------] 31% est:13s
plot: [5,5] [===============>----------------------------------] 31% est:13s `stat_bin()` using `bins
= 30`. Pick better value with `binwidth`.

plot: [5,6] [===============>----------------------------------] 32% est:13s


plot: [5,7] [===============>----------------------------------] 32% est:12s
plot: [5,8] [===============>----------------------------------] 33% est:12s
plot: [5,9] [================>---------------------------------] 33% est:12s
plot: [5,10] [===============>---------------------------------] 34% est:12s
plot: [5,11] [================>--------------------------------] 34% est:12s
plot: [5,12] [================>--------------------------------] 35% est:12s
plot: [5,13] [================>--------------------------------] 35% est:11s
plot: [5,14] [=================>-------------------------------] 36% est:11s
plot: [6,1] [=================>--------------------------------] 36% est:11s
plot: [6,2] [=================>--------------------------------] 37% est:11s
plot: [6,3] [==================>-------------------------------] 37% est:11s
plot: [6,4] [==================>-------------------------------] 38% est:11s
plot: [6,5] [==================>-------------------------------] 38% est:10s
plot: [6,6] [==================>-------------------------------] 39% est:10s `stat_bin()` using
`bins = 30`. Pick better value with `binwidth`.

plot: [6,7] [===================>------------------------------] 39% est:10s


plot: [6,8] [===================>------------------------------] 40% est:10s
plot: [6,9] [===================>------------------------------] 40% est:10s
plot: [6,10] [===================>-----------------------------] 41% est:10s
plot: [6,11] [===================>-----------------------------] 41% est:10s
plot: [6,12] [===================>-----------------------------] 42% est:10s
plot: [6,13] [====================>----------------------------] 42% est: 9s
plot: [6,14] [====================>----------------------------] 43% est: 9s
plot: [7,1] [=====================>----------------------------] 43% est: 9s
plot: [7,2] [=====================>----------------------------] 44% est: 9s
plot: [7,3] [=====================>----------------------------] 44% est: 9s
plot: [7,4] [=====================>----------------------------] 45% est: 9s
plot: [7,5] [======================>---------------------------] 45% est: 9s
plot: [7,6] [======================>---------------------------] 46% est: 9s
plot: [7,7] [======================>---------------------------] 46% est: 8s `stat_bin()` using
`bins = 30`. Pick better value with `binwidth`.

plot: [7,8] [======================>---------------------------] 47% est: 8s


plot: [7,9] [=======================>--------------------------] 47% est: 8s
plot: [7,10] [=======================>-------------------------] 48% est: 8s
plot: [7,11] [=======================>-------------------------] 48% est: 8s
plot: [7,12] [=======================>-------------------------] 49% est: 8s
plot: [7,13] [=======================>-------------------------] 49% est: 8s
plot: [7,14] [=======================>-------------------------] 50% est: 8s
plot: [8,1] [========================>-------------------------] 51% est: 8s
plot: [8,2] [=========================>------------------------] 51% est: 7s
plot: [8,3] [=========================>------------------------] 52% est: 7s
plot: [8,4] [=========================>------------------------] 52% est: 7s
plot: [8,5] [=========================>------------------------] 53% est: 7s
plot: [8,6] [==========================>-----------------------] 53% est: 7s
plot: [8,7] [==========================>-----------------------] 54% est: 7s
plot: [8,8] [==========================>-----------------------] 54% est: 7s `stat_bin()` using
`bins = 30`. Pick better value with `binwidth`.

plot: [8,9] [==========================>-----------------------] 55% est: 7s


plot: [8,10] [==========================>----------------------] 55% est: 7s
plot: [8,11] [==========================>----------------------] 56% est: 7s
plot: [8,12] [==========================>----------------------] 56% est: 6s
plot: [8,13] [===========================>---------------------] 57% est: 6s
plot: [8,14] [===========================>---------------------] 57% est: 6s
plot: [9,1] [============================>---------------------] 58% est: 6s
plot: [9,2] [============================>---------------------] 58% est: 6s
plot: [9,3] [============================>---------------------] 59% est: 6s
plot: [9,4] [=============================>--------------------] 59% est: 6s
plot: [9,5] [=============================>--------------------] 60% est: 6s
plot: [9,6] [=============================>--------------------] 60% est: 6s
plot: [9,7] [=============================>--------------------] 61% est: 6s
plot: [9,8] [==============================>-------------------] 61% est: 6s
plot: [9,9] [==============================>-------------------] 62% est: 5s `stat_bin()`
using `bins = 30`. Pick better value with `binwidth`.

plot: [9,10] [=============================>-------------------] 62% est: 5s


plot: [9,11] [==============================>------------------] 63% est: 5s
plot: [9,12] [==============================>------------------] 63% est: 5s
plot: [9,13] [==============================>------------------] 64% est: 5s
plot: [9,14] [===============================>-----------------] 64% est: 5s
plot: [10,1] [===============================>-----------------] 65% est: 5s
plot: [10,2] [===============================>-----------------] 65% est: 5s
plot: [10,3] [===============================>-----------------] 66% est: 5s
plot: [10,4] [===============================>-----------------] 66% est: 5s
plot: [10,5] [================================>----------------] 67% est: 5s
plot: [10,6] [================================>----------------] 67% est: 5s
plot: [10,7] [================================>----------------] 68% est: 5s
plot: [10,8] [=================================>---------------] 68% est: 4s
plot: [10,9] [=================================>---------------] 69% est: 4s
plot: [10,10] [================================>---------------] 69% est: 4s `stat_bin()`
using `bins = 30`. Pick better value with `binwidth`.

plot: [10,11] [=================================>--------------] 70% est: 4s


plot: [10,12] [=================================>--------------] 70% est: 4s
plot: [10,13] [=================================>--------------] 71% est: 4s
plot: [10,14] [=================================>--------------] 71% est: 4s
plot: [11,1] [==================================>--------------] 72% est: 4s
plot: [11,2] [===================================>-------------] 72% est: 4s
plot: [11,3] [===================================>-------------] 73% est: 4s
plot: [11,4] [===================================>-------------] 73% est: 4s
plot: [11,5] [===================================>-------------] 74% est: 4s
plot: [11,6] [===================================>-------------] 74% est: 3s
plot: [11,7] [====================================>------------] 75% est: 3s
plot: [11,8] [====================================>------------] 76% est: 3s
plot: [11,9] [====================================>------------] 76% est: 3s
plot: [11,10] [====================================>-----------] 77% est: 3s
plot: [11,11] [====================================>-----------] 77% est: 3s `stat_bin()`
using `bins = 30`. Pick better value with `binwidth`.

plot: [11,12] [====================================>-----------] 78% est: 3s


plot: [11,13] [====================================>-----------] 78% est: 3s
plot: [11,14] [=====================================>----------] 79% est: 3s
plot: [12,1] [======================================>----------] 79% est: 3s
plot: [12,2] [======================================>----------] 80% est: 3s
plot: [12,3] [======================================>----------] 80% est: 3s
plot: [12,4] [=======================================>---------] 81% est: 3s
plot: [12,5] [=======================================>---------] 81% est: 3s
plot: [12,6] [=======================================>---------] 82% est: 2s
plot: [12,7] [=======================================>---------] 82% est: 2s
plot: [12,8] [=======================================>---------] 83% est: 2s
plot: [12,9] [========================================>--------] 83% est: 2s
plot: [12,10] [=======================================>--------] 84% est: 2s
plot: [12,11] [=======================================>--------] 84% est: 2s
plot: [12,12] [========================================>-------] 85% est: 2s
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

plot: [12,13] [========================================>-------] 85% est: 2s


plot: [12,14] [========================================>-------] 86% est: 2s
plot: [13,1] [=========================================>-------] 86% est: 2s
plot: [13,2] [=========================================>-------] 87% est: 2s
plot: [13,3] [==========================================>------] 87% est: 2s
plot: [13,4] [==========================================>------] 88% est: 2s
plot: [13,5] [==========================================>------] 88% est: 2s
plot: [13,6] [===========================================>-----] 89% est: 1s
plot: [13,7] [===========================================>-----] 89% est: 1s
plot: [13,8] [===========================================>-----] 90% est: 1s
plot: [13,9] [===========================================>-----] 90% est: 1s
plot: [13,10] [===========================================>----] 91% est: 1s
plot: [13,11] [===========================================>----] 91% est: 1s
plot: [13,12] [===========================================>----] 92% est: 1s
plot: [13,13] [===========================================>----] 92% est: 1s
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

plot: [13,14] [============================================>---] 93% est: 1s


plot: [14,1] [=============================================>---] 93% est: 1s
plot: [14,2] [=============================================>---] 94% est: 1s
plot: [14,3] [=============================================>---] 94% est: 1s
plot: [14,4] [=============================================>---] 95% est: 1s
plot: [14,5] [==============================================>--] 95% est: 1s
plot: [14,6] [==============================================>--] 96% est: 1s
plot: [14,7] [==============================================>--] 96% est: 0s
plot: [14,8] [===============================================>-] 97% est: 0s
plot: [14,9] [===============================================>-] 97% est: 0s
plot: [14,10] [==============================================>-] 98% est: 0s
plot: [14,11] [==============================================>-] 98% est: 0s
plot: [14,12] [===============================================>] 99% est: 0s
plot: [14,13] [===============================================>] 99% est: 0s
plot: [14,14] [================================================]100% est: 0s

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.


# POR LO TANTO TENEMOS EL MODELO DEL “STEP” PARA PODER CONSTRUIR LA
ECUACIÓN DE REGRESIÓN
> modelo <- lm ( medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad + tax + ptratio +
black + lstat, data=Boston)
> summary(modelo)

Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + age +
dis + rad + tax + ptratio + black + lstat, data = Boston)

Residuals:
Min 1Q Median 3Q Max
-15.595 -2.730 -0.518 1.777 26.199

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.646e+01 5.103e+00 7.144 3.28e-12 ***
crim -1.080e-01 3.286e-02 -3.287 0.001087 **
zn 4.642e-02 1.373e-02 3.382 0.000778 ***
indus 2.056e-02 6.150e-02 0.334 0.738288
chas 2.687e+00 8.616e-01 3.118 0.001925 **
nox -1.777e+01 3.820e+00 -4.651 4.25e-06 ***
rm 3.810e+00 4.179e-01 9.116 < 2e-16 ***
age 6.922e-04 1.321e-02 0.052 0.958229
dis -1.476e+00 1.995e-01 -7.398 6.01e-13 ***
rad 3.060e-01 6.635e-02 4.613 5.07e-06 ***
tax -1.233e-02 3.760e-03 -3.280 0.001112 **
ptratio -9.527e-01 1.308e-01 -7.283 1.31e-12 ***
black 9.312e-03 2.686e-03 3.467 0.000573 ***
lstat -5.248e-01 5.072e-02 -10.347 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 4.745 on 492 degrees of freedom


Multiple R-squared: 0.7406, Adjusted R-squared: 0.7338
F-statistic: 108.1 on 13 and 492 DF, p-value: < 2.2e-16

𝑝 − 𝑣𝑎𝑙𝑜𝑟 = 2.2 × 10−16 𝑒𝑠 𝑚𝑒𝑛𝑜𝑟 𝑞𝑢𝑒 0,05


#COMO P-VALOR ES MENOR QUE 0.05 ENTONCES SE RECHAZA LA
HIPÓTESIS DE QUE NO HAY MODELO DE REGRESIÓN, POR LO QUE
QUIERE DECIR QUE “SI” ES VALIDA LA ECUACIÓN DE REGRESIÓN
PLANTEADA

𝑅 𝑐𝑢𝑎𝑑𝑟𝑎𝑑𝑜 𝑎𝑗𝑢𝑠𝑡𝑎𝑑𝑜 = 73,3%


# VEMOS QUE EL VALOR ES ALTO; EN EL “R” CUADRADO AJUSTADO ; POR LO QUE ES
UNA MEDIDA QUE INDICA EL PORCENTAJE DE VARIABILIDAD DE LAS VARIABLES,
CON LAS QUE SE PUEDE EXPLICAR CON TODAS LAS VARIABLES INDEPENDIENTES
PUESTAS
> step(object = modelo, direction = "both", trace = 1)
Start: AIC=1589.64
medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad +
tax + ptratio + black + lstat
# EN ESTA PARTE USAREMOS EL “AIC” QUE ES UNA MANERA QUE SABE QUITAR O
AGREGAR VARIABLES A LA ECUACIÓN DE REGRESIÓN
Df Sum of Sq RSS AIC
- age 1 0.06 11079 1587.7
- indus 1 2.52 11081 1587.8
<none> 11079 1589.6
- chas 1 218.97 11298 1597.5
- tax 1 242.26 11321 1598.6
- crim 1 243.22 11322 1598.6
- zn 1 257.49 11336 1599.3
- black 1 270.63 11349 1599.8
- rad 1 479.15 11558 1609.1
- nox 1 487.16 11566 1609.4
- ptratio 1 1194.23 12273 1639.4
- dis 1 1232.41 12311 1641.0
- rm 1 1871.32 12950 1666.6
- lstat 1 2410.84 13490 1687.3

Step: AIC=1587.65
medv ~ crim + zn + indus + chas + nox + rm + dis + rad + tax +
ptratio + black + lstat

Df Sum of Sq RSS AIC


- indus 1 2.52 11081 1585.8
<none> 11079 1587.7
+ age 1 0.06 11079 1589.6
- chas 1 219.91 11299 1595.6
- tax 1 242.24 11321 1596.6
- crim 1 243.20 11322 1596.6
- zn 1 260.32 11339 1597.4
- black 1 272.26 11351 1597.9
- rad 1 481.09 11560 1607.2
- nox 1 520.87 11600 1608.9
- ptratio 1 1200.23 12279 1637.7
- dis 1 1352.26 12431 1643.9
- rm 1 1959.55 13038 1668.0
- lstat 1 2718.88 13798 1696.7
Step: AIC=1585.76
medv ~ crim + zn + chas + nox + rm + dis + rad + tax + ptratio +
black + lstat

Df Sum of Sq RSS AIC


<none> 11081 1585.8
+ indus 1 2.52 11079 1587.7
+ age 1 0.06 11081 1587.8
- chas 1 227.21 11309 1594.0
- crim 1 245.37 11327 1594.8
- zn 1 257.82 11339 1595.4
- black 1 270.82 11352 1596.0
- tax 1 273.62 11355 1596.1
- rad 1 500.92 11582 1606.1
- nox 1 541.91 11623 1607.9
- ptratio 1 1206.45 12288 1636.0
- dis 1 1448.94 12530 1645.9
- rm 1 1963.66 13045 1666.3
- lstat 1 2723.48 13805 1695.0
Call:
lm(formula = medv ~ crim + zn + chas + nox + rm + dis + rad +
tax + ptratio + black + lstat, data = Boston)
Coefficients:
(Intercept) crim zn chas nox rm dis rad tax ptratio
black lstat
36.341145 -0.108413 0.045845 2.718716 -17.376023 3.801579 -1.492711 0.299608
-0.011778 -0.946525 0.009291 -0.522553
#PODEMOS VER QUE CONTENEMOS 11 VARIABLES CON EL CRITERIO DE “AIC”, POR
LO QUE LA ECUACIÓN QUEDARÍA DE LA SIGUIENTE MANERA
𝑌𝐹𝑒𝑟𝑡𝑖𝑙𝑖𝑡𝑦 = 36.3411 − 0.1084 ∗ 𝑐𝑟𝑖𝑚 − 0,0458 ∗ 𝑧𝑛 + 2.718716 ∗ 𝑐ℎ𝑎𝑠 − 17.376023 ∗ 𝑛𝑜𝑥 + 3.801579 ∗ 𝑟𝑚 −
1.492711 ∗ 𝑑𝑖𝑠 + 0.299608 ∗ 𝑟𝑎𝑑 − 0.011778 ∗ 𝑡𝑎𝑥 − 0.946525 ∗ 𝑝𝑡𝑟𝑎𝑡𝑖𝑜 + 0.009291 ∗ 𝑏𝑙𝑎𝑐𝑘 − 0.522553 ∗
𝑙𝑠𝑡𝑎𝑡
#FINALMENTE TENEMOS LAS GRAFICAS RESIDUALES Y EN LO CUAL SE PUEDE OBSERVAR DE UNA MEJOR MANERA QUE MODELO
ES MAS BUENO
GRAFICOS “AIC”

> library(ggplot2)
> library(gridExtra)
Attaching package: ‘gridExtra’
The following object is masked from ‘package:dplyr’:
combine

> plot1 <- ggplot(data = datos, aes(crim, modelo$residuals)) + geom_point() + geom_smooth(color


= "firebrick") + geom_hline(yintercept = 0) + theme_bw()
> plot2 <- ggplot(data = datos, aes(zn, modelo$residuals)) + geom_point() + geom_smooth(color =
"firebrick") + geom_hline(yintercept = 0) + theme_bw()
> plot3 <- ggplot(data = datos, aes(indus, modelo$residuals)) + geom_point() + geom_smooth(color
= "firebrick") + geom_hline(yintercept = 0) + theme_bw()
> plot4 <- ggplot(data = datos, aes(chas, modelo$residuals)) + geom_point() + geom_smooth(color
= "firebrick") + geom_hline(yintercept = 0) + theme_bw()
> plot5 <- ggplot(data = datos, aes(nox, modelo$residuals)) + geom_point() + geom_smooth(color
= "firebrick") + geom_hline(yintercept = 0) + theme_bw()
> plot6 <- ggplot(data = datos, aes(rm, modelo$residuals)) + geom_point() + geom_smooth(color =
"firebrick") + geom_hline(yintercept = 0) + theme_bw()
> plot7 <- ggplot(data = datos, aes(age, modelo$residuals)) + geom_point() + geom_smooth(color =
"firebrick") + geom_hline(yintercept = 0) + theme_bw()
> plot8 <- ggplot(data = datos, aes(dis, modelo$residuals)) + geom_point() + geom_smooth(color =
"firebrick") + geom_hline(yintercept = 0) + theme_bw()
> plot9 <- ggplot(data = datos, aes(rad, modelo$residuals)) + geom_point() + geom_smooth(color =
"firebrick") + geom_hline(yintercept = 0) + theme_bw()
> plot10 <- ggplot(data = datos, aes(tax, modelo$residuals)) + geom_point() + geom_smooth(color
= "firebrick") + geom_hline(yintercept = 0) + theme_bw()
> plot11 <- ggplot(data = datos, aes(ptratio, modelo$residuals)) + geom_point() +
geom_smooth(color = "firebrick") + geom_hline(yintercept = 0) + theme_bw()
> plot12 <- ggplot(data = datos, aes(black, modelo$residuals)) + geom_point() + geom_smooth(color
= "firebrick") + geom_hline(yintercept = 0) + theme_bw()
> plot13 <- ggplot(data = datos, aes(lstat, modelo$residuals)) + geom_point() + geom_smooth(color
= "firebrick") + geom_hline(yintercept = 0) + theme_bw()
> plot14 <- ggplot(data = datos, aes(medv, modelo$residuals)) + geom_point() +
geom_smooth(color = "firebrick") + geom_hline(yintercept = 0) + theme_bw()
> grid.arrange(plot1, plot2, plot3, plot4, plot5, plot6, plot7, plot8, plot9, plot10, plot11, plot13,
plot14)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
There were 17 warnings (use warnings() to see them)
# EN ESTA PARTE SE PUEDE OBSERVAR LAS VARIABLES INDEPENDIENTEMENTE Y NOS DAMOS CUENTA QUE LAS VARIABLES
“LSTAT,; MEDV & DIS” SE ENCUENTRAN MEJOR POSICIONADAS
# ELABORACIÓN DEL GRAFICO Q-Q PLOT
#AQUÍ PODEMOS OBSERVAR DE OTRA MANERA PARA ASÍ ANALIZAR LA CALIDAD DEL MODELO DE REGRESIÓN
> qqnorm(modelo$residuals)
> qqline(modelo$residuals)
# PODEMOS OBSERVAR QUE EL MODELO DE REGRESIÓN SE AJUSTA
ADECUADAMENTE POR LO QUE LA MAYORÍA DE LOS PUNTOS DE LOS “QUANTILES”
ESTA CERCA DE LA LÍNEA
# CRITERIO PARA COMPARAR Y ELEGIR MODELOS DE REGRESIÓN LINEAL
# adj.r.squared = VALOR DEL ESTADÍSTICO “R CUADRADO AJUSTADO”
# MIDE EL PORCENTAJE DE VARIABILIDAD DE LA VARIABLE DEPENDIENTE EN
FUNCIÓN DE LAS VARIABLES
# INDEPENDIENTES /// buscamos el modelo de regresión con valor más alto del R CUADRADO
AJUSTADO
> summary.lm(regresion1)$adj.r.squared
[1] 0.1490955
> summary.lm(regresion2)$adj.r.squared
[1] 0.2309
> summary.lm(regresion3)$adj.r.squared
[1] 0.2894927
> summary.lm(regresion4)$adj.r.squared
[1] 0.3241747
> summary.lm(regresion5)$adj.r.squared
[1] 0.3246258
> summary.lm(regresion6)$adj.r.squared
[1] 0.5837199
> summary.lm(regresion7)$adj.r.squared
[1] 0.5837199
> summary.lm(regresion8)$adj.r.squared
[1] 0.6252695
> summary.lm(regresion9)$adj.r.squared
[1] 0.6252695
> summary.lm(regresion10)$adj.r.squared
[1] 0.6323833
> summary.lm(regresion11)$adj.r.squared
[1] 0.6629729
> summary.lm(regresion12)$adj.r.squared
[1] 0.6765176
# CRITERIO PARA COMPARAR Y ELEGIR MODELOS DE REGRESIÓN LINEAL
# CRITERIO de la SUMA DEL ERROR CUADRÁTICO “$sigma”
# en este criterio SE SELECCIONA EL MODELO DE REGRESIÓN CON EL MENOR VALOR
DE “$sigma”
> summary.lm(regresion1)$sigma
[1] 8.483821
> summary.lm(regresion2)$sigma
[1] 8.065484
> summary.lm(regresion3)$sigma
[1] 7.752386
> summary.lm(regresion4)$sigma
[1] 7.56081
> summary.lm(regresion5)$sigma
[1] 7.558286
> summary.lm(regresion6)$sigma
[1] 5.93395
> summary.lm(regresion7)$sigma
[1] 5.93395
> summary.lm(regresion8)$sigma
[1] 5.630029
> summary.lm(regresion9)$sigma
[1] 5.630029
> summary.lm(regresion10)$sigma
[1] 5.576333
> summary.lm(regresion11)$sigma
[1] 5.33929
> summary.lm(regresion12)$sigma
[1] 5.2309

También podría gustarte