myData = read.csv("midtermRegressionS13_F15.csv")
head(myData)
summary(myData)
cor(myData) # For a data.frame, cor() returns the correlation matrix.
x = myData$Midterm.1
y = myData$Midterm.2
devx = x-mean(x)
devy = y-mean(y)
sum(devx*devy)/(sqrt(sum(devx^2))*sqrt(sum(devy^2)))
cor(x,y) # For two vectors, cor() just gives the Pearson correlation coefficient.
plot(myData)
abline(lm(y~x)) # Note that order matters in the command lm(). The y variable must come first
# The slope of the regression line is: r s_y/s_x:
cor(x,y)*sd(y)/sd(x)
# To see the details of the least squared regression line:
myLinearModel = lm(y~x)
myLinearModel
plot(resid(myLinearModel))
abline(0,0) # This command adds a line with y-intercept = 0 and slope = 0 to the plot.