In [1]:
myData = read.csv("midtermRegressionS13_F15.csv")
In [49]:
head(myData)
Out[49]:
Midterm.1Midterm.2
15254
27073
38689
47362
59285
64772
In [12]:
summary(myData)
cor(myData) # For a data.frame, cor() returns the correlation matrix.
Out[12]:
   Midterm.1       Midterm.2     
 Min.   :33.00   Min.   : 19.00  
 1st Qu.:67.00   1st Qu.: 65.75  
 Median :79.00   Median : 75.00  
 Mean   :76.78   Mean   : 74.17  
 3rd Qu.:88.00   3rd Qu.: 84.00  
 Max.   :99.00   Max.   :100.00  
Out[12]:
Midterm.1Midterm.2
Midterm.11.00000000.6094617
Midterm.20.60946171.0000000
In [32]:
x = myData$Midterm.1
y = myData$Midterm.2
devx = x-mean(x)
devy = y-mean(y)
sum(devx*devy)/(sqrt(sum(devx^2))*sqrt(sum(devy^2)))
cor(x,y) # For two vectors, cor() just gives the Pearson correlation coefficient.
Out[32]:
0.609461666031633
Out[32]:
0.609461666031632
In [53]:
plot(myData)
abline(lm(y~x)) # Note that order matters in the command lm(). The y variable must come first
In [54]:
# The slope of the regression line is: r s_y/s_x:
cor(x,y)*sd(y)/sd(x)
Out[54]:
0.57972495435538
In [55]:
# To see the details of the least squared regression line:
myLinearModel = lm(y~x)
myLinearModel
Out[55]:
Call:
lm(formula = y ~ x)

Coefficients:
(Intercept)            x  
    29.6567       0.5797  
In [56]:
plot(resid(myLinearModel))
abline(0,0) # This command adds a line with y-intercept = 0 and slope = 0 to the plot.
In [ ]:
In [ ]: