Documentos de Académico
Documentos de Profesional
Documentos de Cultura
Mean
Wins V1 V2 V3 V4 V5
1 0 955293 269.5600 59.30727 59.14182 1.669812
2 1 2398446 269.4759 58.74483 58.22069 1.655517
3 2 3881868 286.4500 65.25000 64.67500 1.779500
4 3 5819988 298.1000 56.90000 63.20000 1.751000
5 7 10867052 302.4000 59.80000 69.90000 1.733000
Median
aggregate(cbind(case$Earnings,case$`Yards/Drive`,case$`Driving Accuracy`,
case$`Greens in Regulation`,case$`Putting Average`)~Wins,data=case,FUN = median)
Wins V1 V2 V3 V4 V5
1 0 874215.9 287.3 63.30 63.10 1.7870
2 1 2079248.0 289.5 61.20 62.30 1.7700
3 2 4255098.4 285.8 64.15 65.05 1.7775
4 3 5819988.0 298.1 56.90 63.20 1.7510
5 7 10867052.0 302.4 59.80 69.90 1.7330
SD
aggregate(cbind(case$Earnings,case$`Yards/Drive`,case$`Driving Accuracy`,
case$`Greens in Regulation`,case$`Putting Average`)~Wins,data=case,FUN = sd)
Wins V1 V2 V3 V4 V5
1 0 661510.5 72.756728 16.629956 16.11892 0.44823132
2 1 982649.8 75.056225 17.234985 16.38203 0.45917625
3 2 1118658.0 5.693564 5.594342 1.37447 0.01126943
4 3 NA NA NA NA NA
5 7 NA NA NA NA NA
options(scipen = 5)
> par(mfrow=c(1,5))
> boxplot(Earnings, main="earnings",col = "blue")
> boxplot(`Putting Average`, main="putting average",col = "yellow")
> boxplot(`Yards/Drive`, main="drive",col = "red")
> boxplot(`Driving Accuracy`, main="driving accuracy",col = blues9)
> boxplot(`Cuts Made`, main="cuts made",col = "red")
plot(case$Earnings,case$"Putting Average",
main = "Scatterplot Earnings versus Putting Average",xlab ="Earnings",
ylab ="Putting Average",cex=0.9,pch=9,col=2)
d. Scatterplots with trend line of Driving Length Versus Driving Accuracy.
plot(`Yards/Drive`,`Driving Accuracy`,main = 'Driving Accuracy',pch=17,
xlab='Driving Length',ylab = 'Driving Accuracy',col="Red")
B) Run a regression of Earnings versus the three most highly correlated variable
(positive or negative) with Earnings. Interpret the output.
reg=lm(Earnings~Rank+Age+Events,data=case)
reg
Call:
lm(formula = Earnings ~ Rank + Age + Events, data = case)
Coefficients:
(Intercept) Rank Age Events
3882073.8 -17506.7 483.4 -34182.9
> plot(case)
residuals(reg)
1 2 3 4 5 6 7
7533458.081 2706580.863 1799523.037 1616932.379 1629037.006 1543259.409 1196054.44
5
8 9 10 11 12 13 14
950570.514 632126.357 506781.889 359623.076 207230.274 272277.489 42776.178
15 16 17 18 19 20 21
169306.083 166912.684 229950.356 139872.296 -310182.301 -298152.700 -258746.287
22 23 24 25 26 27 28
-248253.813 108174.176 2327.156 -124093.198 -98636.030 -250622.707 -243404.427
29 30 31 32 33 34 35
-515656.050 -232294.421 -471489.238 -494304.411 -251206.011 -201226.756 -515934.745
36 37 38 39 40 41 42
-336983.002 -266893.322 -351043.475 -395188.755 -787462.929 -490003.596 -520668.165
43 44 45 46 47 48 49
-415439.672 -292080.847 -488870.237 -571299.759 -560399.075 -141346.874 -382804.619
50 51 52 53 54 55 56
-307358.393 -330186.065 -386448.100 -312769.759 -815901.640 -390149.267 -426453.536
57 58 59 60 61 62 63
-295337.762 -298457.398 -349728.268 -408844.107 -300007.826 -484972.388 -642392.498
64 65 66 67 68 69 70
-730919.064 -250894.469 -371450.143 -523884.193 -615093.843 -292212.366 -402087.484
71 72 73 74 75 76 77
-409209.846 -774396.874 -446931.204 -384437.446 -406075.998 -354794.051 -567799.575
78 79 80 81 82 83 84
-658995.711 -504245.282 -256726.703 -463826.483 -550448.729 -821512.076 -360488.868
85 86 87 88 89 90 91
-268451.775 -185965.131 -240671.086 -329484.929 -363003.329 -329591.748 -245177.411
92 93 94 95 96 97 98
-145466.483 -750345.437 -615796.624 -474137.459 -488859.043 -299310.190 -39382.221
99 100 101 102 103 104 105
-502379.356 -104172.899 -608513.784 -289494.937 -100834.539 -81504.776 -51771.323
106 107 108 109 110 111 112
-171084.235 -257396.095 -71193.511 -10458.750 -240506.461 -194626.141 -46620.886
113 114 115 116 117 118 119
3876.859 -59167.102 -108410.603 -310991.324 -19411.144 -134736.236 42809.197
120 121 122 123 124 125 126
-76770.266 177109.210 21259.342 -16169.988 145277.017 132253.432 -64812.017
127 128 129 130 131 132 133
211120.325 244963.735 35249.811 -53079.102 185797.582 128418.121 -73246.285
134 135 136 137 138 139 140
120108.599 109013.692 52256.997 86537.690 250075.569 142067.864 222232.026
141 142 143 144 145 146 147
-247204.881 -304911.509 155137.629 173888.864 -57725.327 63713.287 140540.659
148 149 150 151 152 153 154
-59648.036 164374.012 144304.622 90566.967 324949.475 -179601.346 340398.496
155 156 157 158 159 160 161
153848.494 503538.314 266488.914 285070.719 152994.027 127297.744 -96114.223
162 163 164 165 166 167 168
-76589.708 271152.615 186657.140 462857.184 147017.967 405006.072 9489.566
169 170 171 172 173 174 175
116469.800 301103.336 280768.726 459110.684 294462.135 304718.352 8950.682
176 177 178 179 180 181 182
-110796.053 -239055.870 147120.231 -149717.896 -160931.514 473851.713 270515.449
183 184 185 186 187 188 189
325919.516 473684.525 317588.097 434741.951 371701.993 457280.315 199242.508
190 191 192 193 194 195 196
105216.821 112889.622 498534.930 36168.758 412208.961 393684.750 642789.899
197 198 199 200
138282.517 56310.467 170549.490 19056.005
summary(reg)
Call:
lm(formula = Earnings ~ Rank + Age + Events, data = case)
Residuals:
Min 1Q Median 3Q Max
-821512 -329512 -74918 167511 7533458
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3882073.8 391411.4 9.918 < 2e-16 ***
Rank -17506.7 904.2 -19.361 < 2e-16 ***
Age 483.4 7673.6 0.063 0.949833
Events -34182.9 8993.2 -3.801 0.000192 ***
---
Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
question 1.......................................................
a
mean_earnings = mean(ca$Earnings,na.rm = TRUE)
median_earnings = median(ca$Earnings,na.rm = TRUE)
sd_earnings = sd(ca$Earnings,na.rm = TRUE)
mean_earnings
median_earnings
sd_earnings
b
mean_Yard.Drive = mean(ca$Yards.Drive,na.rm = TRUE)
median_Yard.Drive = median(ca$Yards.Drive,na.rm = TRUE)
sd_Yard.Drive = sd(ca$Yards.Drive,na.rm = TRUE)
mean_Yard.Drive
median_Yard.Drive
sd_Yard.Drive
c
mean_greens.in.regulation = mean(ca$Greens.in.Regulation,na.rm = TRUE)
median_greens.in.regulation = median(ca$Greens.in.Regulation,na.rm = TRUE)
sd_greens.in.regulation = sd(ca$Greens.in.Regulation,na.rm = TRUE)
mean_greens.in.regulation
median_greens.in.regulation
sd_greens.in.regulation
d
mean_putting.average = mean(ca$Putting.Average,na.rm = TRUE)
median_putting.average = median(ca$Putting.Average,na.rm = TRUE)
sd_putting.average = sd(ca$Putting.Average,na.rm = TRUE)
mean_putting.average
median_putting.average
sd_putting.average
question2
a
boxplot(Earnings,col = "pink",main = "Earnings")
b
boxplot(Putting.Average,col = "green",main = "Putting Average")
c
boxplot(Driving.Accuracy,col = "yellow",main = "Driving Accuracy")
d
boxplot(Yards.Drive,col = "red",main = "Yards Drive")
e
boxplot(Cuts.Made,col = "orange",main = "Cuts Made")
question3
ca$Player =NULL
ca[is.na(ca)] =0
cor(ca)
B) Run a regression of Earnings versus the three most highly correlated variable
(positive or negative) with Earnings. Interpret the output.
input = ca[,c("Earnings","Bogies","Pars","Rounds")]
model = lm(Earnings~Bogies+Pars+Rounds,data = input)
model
plot(model)
summery(model)