Está en la página 1de 18

Year 1986

Name: Dhruv Kumar


Section: Q1303
Reg.no:11305472
Roll.no: B(52)

1) Calculate the mean, median, and standard deviation of the following


variables of the year allotted to you, broken down by the Wins.
Ans)
aggregate(cbind(case$Earnings,case$`Yards/Drive`, case$`Driving Accuracy`,
case$`Greens in Regulation`, case$`Putting Average`)~Wins,data=case,FUN = mean)

Mean

Wins V1 V2 V3 V4 V5
1 0 955293 269.5600 59.30727 59.14182 1.669812
2 1 2398446 269.4759 58.74483 58.22069 1.655517
3 2 3881868 286.4500 65.25000 64.67500 1.779500
4 3 5819988 298.1000 56.90000 63.20000 1.751000
5 7 10867052 302.4000 59.80000 69.90000 1.733000

Median

aggregate(cbind(case$Earnings,case$`Yards/Drive`,case$`Driving Accuracy`,
case$`Greens in Regulation`,case$`Putting Average`)~Wins,data=case,FUN = median)

Wins V1 V2 V3 V4 V5
1 0 874215.9 287.3 63.30 63.10 1.7870
2 1 2079248.0 289.5 61.20 62.30 1.7700
3 2 4255098.4 285.8 64.15 65.05 1.7775
4 3 5819988.0 298.1 56.90 63.20 1.7510
5 7 10867052.0 302.4 59.80 69.90 1.7330

SD

aggregate(cbind(case$Earnings,case$`Yards/Drive`,case$`Driving Accuracy`,
case$`Greens in Regulation`,case$`Putting Average`)~Wins,data=case,FUN = sd)
Wins V1 V2 V3 V4 V5
1 0 661510.5 72.756728 16.629956 16.11892 0.44823132
2 1 982649.8 75.056225 17.234985 16.38203 0.45917625
3 2 1118658.0 5.693564 5.594342 1.37447 0.01126943
4 3 NA NA NA NA NA
5 7 NA NA NA NA NA

2) Create box plot of the following variables:

options(scipen = 5)
> par(mfrow=c(1,5))
> boxplot(Earnings, main="earnings",col = "blue")
> boxplot(`Putting Average`, main="putting average",col = "yellow")
> boxplot(`Yards/Drive`, main="drive",col = "red")
> boxplot(`Driving Accuracy`, main="driving accuracy",col = blues9)
> boxplot(`Cuts Made`, main="cuts made",col = "red")

3) Create the following graphs:

a. Scatterplots of Age Versus Events and Earnings


plot(Age,Events,main = "Scatterplot Age versus Events",xlab ="Age",ylab
="Event",cex=0.9,pch=9,col=2)

plot(Age,Earnings,main = "Scatterplot Age versus Earnings",xlab


="Age",ylab ="earnings",cex=0.9,pch=9,col=3)
b. Scatterplots of Earnings Versus Events and Cuts Made
plot(Earnings,Events,main = "Scatterplot Earnings versus Events",xlab
="Earnings",ylab ="Events",cex=0.9,pch=9,col=2)

plot(case$Earnings,case$"Cuts Made",main = "Scatterplot Earnings versus Cuts.M


ade",xlab ="Earnings",ylab ="Cuts.Made",cex=0.9,pch=9,col=2)
c. Scatterplots of Earnings Versus Greens in Regulation and Putting
Average
plot(case$Earnings,case$"Greens in Regulation",main = "Scatterplot Earning
s versus Greens in Regulation", xlab ="Earnings",ylab ="Green in Regulatio
n",cex=0.9,pch=9,col=2)

plot(case$Earnings,case$"Putting Average",
main = "Scatterplot Earnings versus Putting Average",xlab ="Earnings",
ylab ="Putting Average",cex=0.9,pch=9,col=2)
d. Scatterplots with trend line of Driving Length Versus Driving Accuracy.
plot(`Yards/Drive`,`Driving Accuracy`,main = 'Driving Accuracy',pch=17,
xlab='Driving Length',ylab = 'Driving Accuracy',col="Red")

e. Scatterplot of Cuts Made Versus Rounds


plot(`Cuts Made`,Rounds,main = 'Rounds',pch=19,xlab='Cuts Made',
ylab = 'Rounds',col="Red")
4. A) Create a table of Correlation for each of the variables in the data.
> case$Player=NULL
> cor(case, use="pairwise.complete.obs")

Rank Age Events Rounds Cuts Made


Rank 1.0000000 0.215819840 -0.191025866 -0.388754085 -0.6634517
Age 0.2158198 1.000000000 -0.157874508 -0.170378304 -0.1825925
Events -0.1910259 -0.157874508 1.000000000 0.949725252 0.6857612
Rounds -0.3887541 -0.170378304 0.949725252 1.000000000 0.8701525
Cuts Made -0.6634517 -0.182592510 0.685761203 0.870152524 1.0000000
Top 10s -0.8376737 -0.203455261 0.048665759 0.218861763 0.4818820
Wins -0.4589665 -0.030972987 -0.078312375 0.005116092 0.1573346
Earnings -0.7997339 -0.151215015 -0.003000882 0.162527897 0.4407068
Yards/Drive -0.3107235 -0.129040549 0.623173420 0.611451515 0.4978396
Driving Accuracy -0.2893124 -0.008965467 0.609364441 0.612652853 0.5105561
Greens in Regulation -0.3401873 -0.096884667 0.641945923 0.647542309 0.5496059
Putting Average -0.2856897 -0.080900648 0.626435696 0.610226563 0.4894462
Sand Save Pct -0.3967895 -0.117806849 0.510968012 0.540526566 0.5076465
Eagles -0.2419185 -0.209512698 0.539725566 0.566507535 0.4844327
Birdies -0.4583443 -0.218303322 0.886051260 0.966434823 0.8851266
Pars -0.3963193 -0.158105300 0.930778898 0.992150440 0.8788488
Bogies -0.2939724 -0.157982924 0.931398561 0.943186959 0.7774400
Top 10s Wins Earnings Yards/Drive
Rank -0.83767370 -0.458966548 -0.799733933 -0.31072350
Age -0.20345526 -0.030972987 -0.151215015 -0.12904055
Events 0.04866576 -0.078312375 -0.003000882 0.62317342
Rounds 0.21886176 0.005116092 0.162527897 0.61145152
Cuts Made 0.48188201 0.157334623 0.440706833 0.49783962
Top 10s 1.00000000 0.480868667 0.846382985 0.22407471
Wins 0.48086867 1.000000000 0.807564309 0.04301955
Earnings 0.84638298 0.807564309 1.000000000 0.19177883
Yards/Drive 0.22407471 0.043019552 0.191778830 1.00000000
Driving Accuracy 0.20997743 0.011745779 0.153266624 0.92390436
Greens in Regulation 0.26317491 0.047668050 0.217833450 0.98237332
Putting Average 0.19346932 0.018541856 0.158554873 0.99245793
Sand Save Pct 0.30973269 0.082813336 0.269802927 0.88405383
Eagles 0.18925626 0.102715011 0.177048121 0.36847881
Birdies 0.31485711 0.059357236 0.250983479 0.58878871
Pars 0.22872756 0.012034296 0.173569951 0.59517319
Bogies 0.09908614 -0.080614408 0.044746168 0.60284200
Driving Accuracy Greens in Regulation Putting Average
Rank -0.289312371 -0.34018734 -0.28568968
Age -0.008965467 -0.09688467 -0.08090065
Events 0.609364441 0.64194592 0.62643570
Rounds 0.612652853 0.64754231 0.61022656
Cuts Made 0.510556140 0.54960592 0.48944616
Top 10s 0.209977429 0.26317491 0.19346932
Wins 0.011745779 0.04766805 0.01854186
Earnings 0.153266624 0.21783345 0.15855487
Yards/Drive 0.923904359 0.98237332 0.99245793
Driving Accuracy 1.000000000 0.94708107 0.95304805
Greens in Regulation 0.947081065 1.00000000 0.98300865
Putting Average 0.953048047 0.98300865 1.00000000
Sand Save Pct 0.871864209 0.87339926 0.89141268
Eagles 0.278614440 0.37991011 0.32699860
Birdies 0.562925997 0.62543658 0.56769331
Pars 0.621433887 0.64076273 0.60152527
Bogies 0.563537625 0.61381639 0.60263071
Sand Save Pct Eagles Birdies Pars Bogies
Rank -0.39678950 -0.2419185 -0.45834425 -0.3963193 -0.29397238
Age -0.11780685 -0.2095127 -0.21830332 -0.1581053 -0.15798292
Events 0.51096801 0.5397256 0.88605126 0.9307789 0.93139856
Rounds 0.54052657 0.5665075 0.96643482 0.9921504 0.94318696
Cuts Made 0.50764648 0.4844327 0.88512664 0.8788488 0.77744001
Top 10s 0.30973269 0.1892563 0.31485711 0.2287276 0.09908614
Wins 0.08281334 0.1027150 0.05935724 0.0120343 -0.08061441
Earnings 0.26980293 0.1770481 0.25098348 0.1735700 0.04474617
Yards/Drive 0.88405383 0.3684788 0.58878871 0.5951732 0.60284200
Driving Accuracy 0.87186421 0.2786144 0.56292600 0.6214339 0.56353763
Greens in Regulation 0.87339926 0.3799101 0.62543658 0.6407627 0.61381639
Putting Average 0.89141268 0.3269986 0.56769331 0.6015253 0.60263071
Sand Save Pct 1.00000000 0.2673816 0.52231045 0.5439741 0.47607932
Eagles 0.26738165 1.0000000 0.57095104 0.5305191 0.55582434
Birdies 0.52231045 0.5709510 1.00000000 0.9489202 0.89009535
Pars 0.54397409 0.5305191 0.94892021 1.0000000 0.91362285
Bogies 0.47607932 0.5558243 0.89009535 0.9136228 1.00000000

B) Run a regression of Earnings versus the three most highly correlated variable
(positive or negative) with Earnings. Interpret the output.

reg=lm(Earnings~Rank+Age+Events,data=case)
reg

Call:
lm(formula = Earnings ~ Rank + Age + Events, data = case)

Coefficients:
(Intercept) Rank Age Events
3882073.8 -17506.7 483.4 -34182.9

> plot(case)
residuals(reg)
1 2 3 4 5 6 7
7533458.081 2706580.863 1799523.037 1616932.379 1629037.006 1543259.409 1196054.44
5
8 9 10 11 12 13 14
950570.514 632126.357 506781.889 359623.076 207230.274 272277.489 42776.178
15 16 17 18 19 20 21
169306.083 166912.684 229950.356 139872.296 -310182.301 -298152.700 -258746.287
22 23 24 25 26 27 28
-248253.813 108174.176 2327.156 -124093.198 -98636.030 -250622.707 -243404.427
29 30 31 32 33 34 35
-515656.050 -232294.421 -471489.238 -494304.411 -251206.011 -201226.756 -515934.745
36 37 38 39 40 41 42
-336983.002 -266893.322 -351043.475 -395188.755 -787462.929 -490003.596 -520668.165
43 44 45 46 47 48 49
-415439.672 -292080.847 -488870.237 -571299.759 -560399.075 -141346.874 -382804.619
50 51 52 53 54 55 56
-307358.393 -330186.065 -386448.100 -312769.759 -815901.640 -390149.267 -426453.536
57 58 59 60 61 62 63
-295337.762 -298457.398 -349728.268 -408844.107 -300007.826 -484972.388 -642392.498
64 65 66 67 68 69 70
-730919.064 -250894.469 -371450.143 -523884.193 -615093.843 -292212.366 -402087.484
71 72 73 74 75 76 77
-409209.846 -774396.874 -446931.204 -384437.446 -406075.998 -354794.051 -567799.575
78 79 80 81 82 83 84
-658995.711 -504245.282 -256726.703 -463826.483 -550448.729 -821512.076 -360488.868
85 86 87 88 89 90 91
-268451.775 -185965.131 -240671.086 -329484.929 -363003.329 -329591.748 -245177.411
92 93 94 95 96 97 98
-145466.483 -750345.437 -615796.624 -474137.459 -488859.043 -299310.190 -39382.221
99 100 101 102 103 104 105
-502379.356 -104172.899 -608513.784 -289494.937 -100834.539 -81504.776 -51771.323
106 107 108 109 110 111 112
-171084.235 -257396.095 -71193.511 -10458.750 -240506.461 -194626.141 -46620.886
113 114 115 116 117 118 119
3876.859 -59167.102 -108410.603 -310991.324 -19411.144 -134736.236 42809.197
120 121 122 123 124 125 126
-76770.266 177109.210 21259.342 -16169.988 145277.017 132253.432 -64812.017
127 128 129 130 131 132 133
211120.325 244963.735 35249.811 -53079.102 185797.582 128418.121 -73246.285
134 135 136 137 138 139 140
120108.599 109013.692 52256.997 86537.690 250075.569 142067.864 222232.026
141 142 143 144 145 146 147
-247204.881 -304911.509 155137.629 173888.864 -57725.327 63713.287 140540.659
148 149 150 151 152 153 154
-59648.036 164374.012 144304.622 90566.967 324949.475 -179601.346 340398.496
155 156 157 158 159 160 161
153848.494 503538.314 266488.914 285070.719 152994.027 127297.744 -96114.223
162 163 164 165 166 167 168
-76589.708 271152.615 186657.140 462857.184 147017.967 405006.072 9489.566
169 170 171 172 173 174 175
116469.800 301103.336 280768.726 459110.684 294462.135 304718.352 8950.682
176 177 178 179 180 181 182
-110796.053 -239055.870 147120.231 -149717.896 -160931.514 473851.713 270515.449
183 184 185 186 187 188 189
325919.516 473684.525 317588.097 434741.951 371701.993 457280.315 199242.508
190 191 192 193 194 195 196
105216.821 112889.622 498534.930 36168.758 412208.961 393684.750 642789.899
197 198 199 200
138282.517 56310.467 170549.490 19056.005

summary(reg)
Call:
lm(formula = Earnings ~ Rank + Age + Events, data = case)

Residuals:
Min 1Q Median 3Q Max
-821512 -329512 -74918 167511 7533458

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3882073.8 391411.4 9.918 < 2e-16 ***
Rank -17506.7 904.2 -19.361 < 2e-16 ***
Age 483.4 7673.6 0.063 0.949833
Events -34182.9 8993.2 -3.801 0.000192 ***
---
Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1

Residual standard error: 711300 on 196 degrees of freedom


Multiple R-squared: 0.6648, Adjusted R-squared: 0.6596
F-statistic: 129.6 on 3 and 196 DF, p-value: < 2.2e-16
Step1:
H0 is rejected (null hypothesis)
H1 is accepted (alternative hypothesis)
Step2 :
y = 17506.7+ 483.4x + 34182.9y
Step3 :
x is a highly significant (p < 0.5)
Step4:
R-squared: 0.6648 model is accepted
Step5: Multiple regression is accepted
2nd Method

question 1.......................................................
a
mean_earnings = mean(ca$Earnings,na.rm = TRUE)
median_earnings = median(ca$Earnings,na.rm = TRUE)
sd_earnings = sd(ca$Earnings,na.rm = TRUE)
mean_earnings
median_earnings
sd_earnings

b
mean_Yard.Drive = mean(ca$Yards.Drive,na.rm = TRUE)
median_Yard.Drive = median(ca$Yards.Drive,na.rm = TRUE)
sd_Yard.Drive = sd(ca$Yards.Drive,na.rm = TRUE)
mean_Yard.Drive
median_Yard.Drive
sd_Yard.Drive

mean_driving_accuracy = mean(ca$Driving.Accuracy,na.rm = TRUE)


median_driving_accuracy = median(ca$Driving.Accuracy,na.rm = TRUE)
sd_driving_accuracy = sd(ca$Driving.Accuracy,na.rm = TRUE)
mean_driving_accuracy
median_driving_accuracy
sd_driving_accuracy

c
mean_greens.in.regulation = mean(ca$Greens.in.Regulation,na.rm = TRUE)
median_greens.in.regulation = median(ca$Greens.in.Regulation,na.rm = TRUE)
sd_greens.in.regulation = sd(ca$Greens.in.Regulation,na.rm = TRUE)
mean_greens.in.regulation
median_greens.in.regulation
sd_greens.in.regulation

d
mean_putting.average = mean(ca$Putting.Average,na.rm = TRUE)
median_putting.average = median(ca$Putting.Average,na.rm = TRUE)
sd_putting.average = sd(ca$Putting.Average,na.rm = TRUE)
mean_putting.average
median_putting.average
sd_putting.average

question2

a
boxplot(Earnings,col = "pink",main = "Earnings")

b
boxplot(Putting.Average,col = "green",main = "Putting Average")

c
boxplot(Driving.Accuracy,col = "yellow",main = "Driving Accuracy")

d
boxplot(Yards.Drive,col = "red",main = "Yards Drive")

e
boxplot(Cuts.Made,col = "orange",main = "Cuts Made")

question3

a. Scatterplots of Age Versus Events and Earnings

plot(Age,Events,main = "Scatterplot Age versus Events",cex=0.9,pch=9,col=2)


abline(lm(Age ~ Events))

plot(Age,Earnings,main = "Scatterplot Age versus Earnings",cex=0.9,pch=9,col=3)


abline(lm(ca$Age ~ ca$Earnings))

b. Scatterplots of Earnings Versus Events and Cuts Made


plot(Earnings,Events,main = "Scatterplot Earnings versus Events",cex=0.9,pch=9,col=2)
abline(lm(Earnings ~ Events))

plot(Earnings,Cuts.Made,main = "Scatterplot Earnings versus Cuts.Made",cex=0.9,pch=9,


col=2)
abline(lm(Earnings ~ Cuts.Made))
c. Scatterplots of Earnings Versus Greens in Regulation and Putting Average
plot(Earnings,Greens.in.Regulation,main = "Scatterplot Earnings versus Greens in
Regulation",cex=0.9,pch=9,col=2)
abline(lm(Earnings ~ Greens.in.Regulation))

plot(Earnings,Putting.Average,main = "Scatterplot Earnings versus Putting Average",


cex=0.9,pch=9,col=2)
abline(lm(Earnings ~ Putting.Average))

d. Scatterplots with trend line of Driving Length Versus Driving Accuracy

plot(Earnings,Putting.Average,main = "Scatterplot Earnings versus Putting Average",


cex=0.9,pch=9,col=2)
abline(lm(Earnings ~ Putting.Average))

e. Scatterplot of Cuts Made Versus Rounds


plot(Cuts.Made,Rounds,main = "Scatterplot Cuts Made versus Rounds",cex=0.9,pch=9,col=2)
abline(lm(Cuts.Made ~ Rounds))

4. A) Create a table of Correlation for each of the variables in the data.

ca$Player =NULL
ca[is.na(ca)] =0
cor(ca)

B) Run a regression of Earnings versus the three most highly correlated variable
(positive or negative) with Earnings. Interpret the output.

input = ca[,c("Earnings","Bogies","Pars","Rounds")]
model = lm(Earnings~Bogies+Pars+Rounds,data = input)
model
plot(model)
summery(model)

También podría gustarte