Documentos de Académico
Documentos de Profesional
Documentos de Cultura
regresión
Contents
4. Validación del modelo de regresión 2
4.1. Durbin-Watson . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2
4.2. Test RESET . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3
Referencias 14
1
4. Validación del modelo de regresión
4.1. Durbin-Watson
library(wooldridge)
ceomodelo<-lm(lsalary ~lsales+roe+ros+finance, data=ceosal1)
summary(ceomodelo)
##
## Call:
## lm(formula = lsalary ~ lsales + roe + ros + finance, data = ceosal1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9569 -0.2522 -0.0325 0.1996 2.8391
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.2391296 0.3138640 13.506 < 2e-16 ***
## lsales 0.2805778 0.0349635 8.025 7.88e-14 ***
## roe 0.0188739 0.0041010 4.602 7.34e-06 ***
## ros 0.0003150 0.0005373 0.586 0.5583
## finance 0.1855094 0.0813177 2.281 0.0236 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4783 on 204 degrees of freedom
## Multiple R-squared: 0.3005, Adjusted R-squared: 0.2868
## F-statistic: 21.91 on 4 and 204 DF, p-value: 4.644e-15
residuo<-ceomodelo$residuals
n<-length(residuo)
(modedlDW<-lm(residuo[2:n]~residuo[1:(n-1)]))
##
## Call:
## lm(formula = residuo[2:n] ~ residuo[1:(n - 1)])
##
## Coefficients:
## (Intercept) residuo[1:(n - 1)]
## 0.002128 -0.051076
(d=2*(1-modedlDW$coefficients[2]))
## residuo[1:(n - 1)]
## 2.102151
library(lmtest)
2
dwtest(lsalary ~lsales+roe+ros+finance, data=ceosal1)
##
## Durbin-Watson test
##
## data: lsalary ~ lsales + roe + ros + finance
## DW = 2.0977, p-value = 0.7312
## alternative hypothesis: true autocorrelation is greater than 0
##
## RESET test
##
## data: lsalary ~ lsales + roe + ros + finance
## RESET = 3.895, df1 = 2, df2 = 202, p-value = 0.02189
3
5. Validación de supuestos en el modelo de regresión
5.1. Normalidad
attach(wage1)
##
## Call:
## lm(formula = wage ~ educ + exper, data = wage1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.5532 -1.9801 -0.7071 1.2030 15.8370
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.39054 0.76657 -4.423 1.18e-05 ***
## educ 0.64427 0.05381 11.974 < 2e-16 ***
## exper 0.07010 0.01098 6.385 3.78e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.257 on 523 degrees of freedom
## Multiple R-squared: 0.2252, Adjusted R-squared: 0.2222
## F-statistic: 75.99 on 2 and 523 DF, p-value: < 2.2e-16
qqnorm(myresid)
qqline(myresid, col="red")
4
Normal Q−Q Plot
15
Sample Quantiles
10
5
0
−5
−3 −2 −1 0 1 2 3
Theoretical Quantiles
library("car")
5
15186
15
10
myresid
5
0
−5
−3 −2 −1 0 1 2 3
norm quantiles
## [1] 186 15
hist(myresid, breaks=50, col="red", prob=TRUE)
m<-mean(myresid)
std<-sqrt(var(myresid))
curve(dnorm(x, mean=m, sd=std), lwd=2, add=TRUE, yaxt="n")
6
Histogram of myresid
0.20
0.15
Density
0.10
0.05
0.00
−5 0 5 10 15
myresid
#by hand
library(moments);
(sk<-skewness(myresid))
## [1] 1.715497
(kurt<-kurtosis(myresid))
## [1] 7.626735
(n<-length(myresid))
## [1] 526
(test.chi.stat<-sk^2*n/6+(kurt-3)^2*n/24)
## [1] 727.1599
(p.valor<-1-pchisq(test.chi.stat,2))
## [1] 0
#jarque beras
#install.packages("tseries")
options(scipen=0)
library("tseries")
jarque.bera.test(myresid)
7
##
## Jarque Bera Test
##
## data: myresid
## X-squared = 727.16, df = 2, p-value < 2.2e-16
#No parametrico
shapiro.test(myresid)
##
## Shapiro-Wilk normality test
##
## data: myresid
## W = 0.87661, p-value < 2.2e-16
Una parada tecnica. Si queremos evitar las salidas con notación cienfitica:
options(scipen=999)
shapiro.test(myresid)
##
## Shapiro-Wilk normality test
##
## data: myresid
## W = 0.87661, p-value < 0.00000000000000022
5.2. Multicolinealidad
La matriz cuadrada de correlación es una de las test preliminares para identificar posible multicolinealidad
en la serie de datos. En el caso de los Factores de inflación a la varianza (Wooldrige, 2009, p.99) se suele
asumir como regla para detectar multicolinealidad. Valores superiores a 10 se consideran evidencia de
multicolinealidad.
cor(cbind(educ, exper))
## educ exper
## educ 1.0000000 -0.2995418
## exper -0.2995418 1.0000000
#library(car)
#vif(result1)
mcorr<-cor(cbind(educ, exper))
vif<-1/(1-mcorr[1,2]^2)
vif
## [1] 1.098569
#se puede verificar con summary(lm(educ~exper)
# que mcorr[1,2]^2 es el R^2 de la regresión auxiliar.
8
5.3. Heterocedasticidad
5.3.1. Test grafico
10
5
0
−5 0 5 10 15
res
par(mfrow=c(1,2))
plot(wage, res , xlab="ingreso", ylab="residuales")
plot(educ, res , xlab="Educación", ylab="residuales")
9
15
15
10
10
residuales
residuales
5
5
0
0
−5
0 5 10 15 20 25 −5 0 5 10 15
ingreso Educación
par(mfrow=c(2,2))
plot(result1)
10
Standardized residuals
Residuals vs Fitted Normal Q−Q
15
5
186
15
229 186
15
229
Residuals
2
5
−1
−5
−2 0 2 4 6 8 10 −3 −2 −1 0 1 2 3
Standardized residuals
Scale−Location Residuals vs Leverage
186
15
229 0.5
112
59
1.5
2
379
Cook's distance
−2
0.0
library(zoo)
library(lmtest)
bptest(result1)
##
## studentized Breusch-Pagan test
##
## data: result1
## BP = 31.847, df = 2, p-value = 0.0000001215
#Equation bp
modres.lm <- lm(ressq~educ+exper)
N <- length(ressq)
11
#Chi-square = lower.tail=FALSE, cola derecha
#http://courses.atlas.illinois.edu/spring2016/STAT/STAT200/pchisq.html
chisq <- N*Rsqres
pval <- pchisq(chisq, S, lower.tail=FALSE)
c(chisq, pval)
5.3.3.Alternativas
5.3.3.a matrix varianza covarianza de los coeficientes
Errores robusto a la heterocedasticidad. Recordemos que el comando vcov permite obtener la matriz de
varianza covarianza de los coeficientes.
vcov(result1)
##
## Call:
## lm(formula = wage ~ educ + exper, weights = 1/(1 + educ))
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -1.3238 -0.5601 -0.2320 0.3445 4.3839
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.40007 0.56303 -0.711 0.478
## educ 0.42427 0.03851 11.017 < 0.0000000000000002 ***
## exper 0.05678 0.01028 5.525 0.0000000519 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.88 on 523 degrees of freedom
## Multiple R-squared: 0.1899, Adjusted R-squared: 0.1868
## F-statistic: 61.29 on 2 and 523 DF, p-value: < 0.00000000000000022
12
5.3.3.c Mínimo cuadrado robusto
#install.packages("robust")
#install.packages("fit.models")
#install.packages("lib")
#library(robust)
#result1Robust<- lmrob(wage ~ educ + exper)
5.4. Autocorrelación
#dwtest(result1)
13
Referencias
14