Linear Regression

# FUNCTION: fitLinearSummary
# fits a regression line
# inputs: continuous numeric vector for both x and y variables
# outputs: slope, p-value, and R^2 value
fitLinearSummary <- function(y=runif(50),x=runif(50)) {
  yVar <-y
  xVar <- x
  dataFrame <- data.frame(yVar,xVar)
  regModel <- lm(yVar~xVar,data=dataFrame)
  myOut<- c(slope = summary(regModel)$coefficients[2,1],
            pvalue = summary(regModel)$coefficients[2,4],
            Rsquared = summary(regModel)$adj.r.squared)
# Using default settings:
fitLinearSummary() # this returns the slope, pvalue and adjusted R squared value
##       slope      pvalue    Rsquared 
## -0.05249248  0.65757687 -0.01661970
# Using made-up data where x = y which would yield an R squared value of 1 (100% of the variance is explained by these variables)
xLin <- seq(1:20)
yLin <- seq(1:20)
fitLinearSummary(yLin,xLin) # as expected, the R squared value is 1; however, there is an error as there is essentially perfect fit
## Warning in summary.lm(regModel): essentially perfect fit: summary may be
## unreliable

## Warning in summary.lm(regModel): essentially perfect fit: summary may be
## unreliable

## Warning in summary.lm(regModel): essentially perfect fit: summary may be
## unreliable
##         slope        pvalue      Rsquared 
##  1.000000e+00 1.278962e-281  1.000000e+00
# Function: plotLinear
# Plots a scatter plot of data with the  R squared value, slope, and pvalue
# Inputs: Continuous numerical vector for both variables
# Outputs: scatter plot with line of best fit
plotLinear <- function(y=runif(50),x=runif(50)) {
  yVar <-y
  xVar <- x
  dataFrame <- data.frame(yVar,xVar)
  regModel <- lm(yVar~xVar,data=dataFrame)
    # myOut<- c(slope = summary(regModel)$coefficients[2,1],
    #         pvalue = summary(regModel)$coefficients[2,4],
    #         Rsquared = summary(regModel)$adj.r.squared)
  Rsquared <-c("R-sq=", format(summary(regModel)$adj.r.squared, nsmall=3,digits=3))
plot(y=dataFrame$yVar,x=dataFrame$xVar,pch=21,bg="cornflowerblue",main="Linear Regression", ylab="Y Axis", xlab="X Axis")
# Plotting the default values:

# Plotting made-up data
XLinPlot <- c(3,5,4,5,2)
YLinPlot <- c(4,6,3,4,1)

# Contingency Table

# Function: summaryContTable
# Reports the results from the Chi Squared analysis
# Inputs: discrete numerical vectors for both
# Outputs: the results from the Chi Square analysis

summaryContTable <- function(y=sample(1:100,3),x=sample(1:100,3)) {
  yVar <- y
  xVar <- x
  dataMatrix <- rbind(yVar,xVar)
  rownames(dataMatrix) <- c("Row1","Row2")
  colnames(dataMatrix) <-c("Column1",

# Testing defaults
##  Pearson's Chi-squared test
## data:  dataMatrix
## X-squared = 40.685, df = 2, p-value = 1.464e-09
# Using made-up data (same as below)
yCont <- c(13,62,14)
xCont <- c(2,72,53)
##  Pearson's Chi-squared test
## data:  dataMatrix
## X-squared = 25.622, df = 2, p-value = 2.73e-06
# Function: doContTable
# Makes a contingency table from the data and reports p - value on mosaic plot
# Inputs: discrete numerical vectors for both 
# Outputs: a mosaic plot with p-value and a bar plot

doContTable <- function(y=sample(1:100,3),x=sample(1:100,3)) {
  yVar <- y
  xVar <- x
  dataMatrix <- rbind(yVar,xVar)
  rownames(dataMatrix) <- c("Row1","Row2")
  colnames(dataMatrix) <-c("Column1",
  pvalue <-c("P-value=", format(chisq.test(dataMatrix)$p.value, nsmall=3,digits=3))

# Printing default values
##  Pearson's Chi-squared test
## data:  dataMatrix
## X-squared = 6.1911, df = 2, p-value = 0.04525

# Using made-up data
yCont <- c(13,62,14)
xCont <- c(2,72,53)
##  Pearson's Chi-squared test
## data:  dataMatrix
## X-squared = 25.622, df = 2, p-value = 2.73e-06


# Function: doAnova
# Reports F value (test statistic for the variation between sample means), 
# Inputs: continuous numeric vector for x and a discrete vector for y
# Outputs: a mosaic plot with p-value and a bar plot

doAnova <- function(y=sample(1:100,9),x=c("Category1","Category2", "Category3")) {
  yVar <- y
  xVar <- x
  dataFrame <- data.frame(xVar,yVar)

 AnovaModel<- aov(yVar~xVar,data=dataFrame)
  print(summary(AnovaModel)[[1]][["F value"]][1])

# Printing default values
## [1] 2.63234
# Using made-up data
y <- c(10,2,6,10,12,3,3,18,7)
x <- c("Low","Medium","High")
## [1] 0.748062
# Function: GraphAnova
# Graphs box plot with whiskers 
# Input: continuous numeric vector for x and discrete vector for y
# Output: box plot with whiskers and F value

GraphAnova <- function(y=sample(1:100,9),x=c("Category1","Category2", "Category3")) {
  yVar <- y
  xVar <- x
  dataFrame <- data.frame(xVar,yVar)

 AnovaModel<- aov(yVar~xVar,data=dataFrame)
# Graphing default values

# Graphing made-up values
y <- c(10,2,6,10,12,3,3,18,7)
x <- c("Low","Medium","High")

Logistic Regression

# Function: LogReg
# Completes a logistic regression and outputs summary of logistic regression
# Inputs: continuous numeric vector for x and a discrete vector for y
# Outputs: summary of logisic regression

LogReg <- function(y=c("Category1","Category2","Category3"),x=sample(1:100,12)) {
  yVar <- y
  xVar <- x
  dataFrame <- data.frame(xVar,yVar)

  logRegModel <- glm(yVar ~ xVar,
# Testing default values
## Call:
## glm(formula = yVar ~ xVar, family = binomial(link = "logit"), 
##     data = dataFrame)
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5723  -1.1384   0.5962   0.9253   1.2805  
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  2.63450    2.01844   1.305    0.192
## xVar        -0.02903    0.02695  -1.077    0.281
## (Dispersion parameter for binomial family taken to be 1)
##     Null deviance: 15.276  on 11  degrees of freedom
## Residual deviance: 13.851  on 10  degrees of freedom
## AIC: 17.851
## Number of Fisher Scoring iterations: 4
# Using made-up data
y <- c("Low","Medium","High")
x <- c(20,32,41,28,25,18,19,28,30,31,37,42)
## Call:
## glm(formula = yVar ~ xVar, family = binomial(link = "logit"), 
##     data = dataFrame)
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9939  -1.0095   0.6626   0.8542   1.1601  
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  3.54535    2.81206   1.261    0.207
## xVar        -0.09471    0.08862  -1.069    0.285
## (Dispersion parameter for binomial family taken to be 1)
##     Null deviance: 15.276  on 11  degrees of freedom
## Residual deviance: 13.998  on 10  degrees of freedom
## AIC: 17.998
## Number of Fisher Scoring iterations: 4
# Function: GraphLogReg
# graphs the logistic regression
# inputs: continuous numeric vector for x and a discrete vector for y
# outputs: graph of logistic regression

GraphLogReg <- function(y=rbinom(n=12,size=1,p=0.5),x=sample(1:100,12)) {
  yVar <- y
  xVar <- x
  dataFrame <- data.frame(xVar,yVar)
  logRegModel <- glm(yVar ~ xVar,
plot(x=dataFrame$xVar, y=dataFrame$yVar,pch=21,bg="violet",cex=2.5)
# Using defaults

# Using made up data
yVar <- c(0,1,0,0,1,1)
xVar <- c(3,6,2,8,4,3)