Linear Regression

######################################################################
# FUNCTION: fitLinearSummary
# fits a regression line
# inputs: continuous numeric vector for both x and y variables
# outputs: slope, p-value, and R^2 value
fitLinearSummary <- function(y=runif(50),x=runif(50)) {
  yVar <-y
  xVar <- x
  dataFrame <- data.frame(yVar,xVar)
  regModel <- lm(yVar~xVar,data=dataFrame)
  
  myOut<- c(slope = summary(regModel)$coefficients[2,1],
            pvalue = summary(regModel)$coefficients[2,4],
            Rsquared = summary(regModel)$adj.r.squared)
            
  
  return(myOut) 
}
#####################################################################
# Using default settings:
fitLinearSummary() # this returns the slope, pvalue and adjusted R squared value
##       slope      pvalue    Rsquared 
## -0.05249248  0.65757687 -0.01661970
# Using made-up data where x = y which would yield an R squared value of 1 (100% of the variance is explained by these variables)
xLin <- seq(1:20)
yLin <- seq(1:20)
fitLinearSummary(yLin,xLin) # as expected, the R squared value is 1; however, there is an error as there is essentially perfect fit
## Warning in summary.lm(regModel): essentially perfect fit: summary may be
## unreliable

## Warning in summary.lm(regModel): essentially perfect fit: summary may be
## unreliable

## Warning in summary.lm(regModel): essentially perfect fit: summary may be
## unreliable
##         slope        pvalue      Rsquared 
##  1.000000e+00 1.278962e-281  1.000000e+00
###########################################################################
# Function: plotLinear
# Plots a scatter plot of data with the  R squared value, slope, and pvalue
# Inputs: Continuous numerical vector for both variables
# Outputs: scatter plot with line of best fit
plotLinear <- function(y=runif(50),x=runif(50)) {
  yVar <-y
  xVar <- x
  dataFrame <- data.frame(yVar,xVar)
  regModel <- lm(yVar~xVar,data=dataFrame)
    # myOut<- c(slope = summary(regModel)$coefficients[2,1],
    #         pvalue = summary(regModel)$coefficients[2,4],
    #         Rsquared = summary(regModel)$adj.r.squared)
  Rsquared <-c("R-sq=", format(summary(regModel)$adj.r.squared, nsmall=3,digits=3))
plot(y=dataFrame$yVar,x=dataFrame$xVar,pch=21,bg="cornflowerblue",main="Linear Regression", ylab="Y Axis", xlab="X Axis")
legend("topright",bty="n",legend=Rsquared)
                 
abline(regModel,col="blue")
}
####################################################################
# Plotting the default values:
plotLinear()

# Plotting made-up data
XLinPlot <- c(3,5,4,5,2)
YLinPlot <- c(4,6,3,4,1)
plotLinear(YLinPlot,XLinPlot)

# Contingency Table

###################################################
# Function: summaryContTable
# Reports the results from the Chi Squared analysis
# Inputs: discrete numerical vectors for both
# Outputs: the results from the Chi Square analysis

summaryContTable <- function(y=sample(1:100,3),x=sample(1:100,3)) {
  yVar <- y
  xVar <- x
  
  dataMatrix <- rbind(yVar,xVar)
  rownames(dataMatrix) <- c("Row1","Row2")
  colnames(dataMatrix) <-c("Column1",
                         "Column2",
                         "Column3")
  print(chisq.test(dataMatrix))

}
#############################################
# Testing defaults
summaryContTable()
## 
##  Pearson's Chi-squared test
## 
## data:  dataMatrix
## X-squared = 40.685, df = 2, p-value = 1.464e-09
# Using made-up data (same as below)
yCont <- c(13,62,14)
xCont <- c(2,72,53)
summaryContTable(yCont,xCont)
## 
##  Pearson's Chi-squared test
## 
## data:  dataMatrix
## X-squared = 25.622, df = 2, p-value = 2.73e-06
##############################################################################################
# Function: doContTable
# Makes a contingency table from the data and reports p - value on mosaic plot
# Inputs: discrete numerical vectors for both 
# Outputs: a mosaic plot with p-value and a bar plot

doContTable <- function(y=sample(1:100,3),x=sample(1:100,3)) {
  yVar <- y
  xVar <- x
  
  dataMatrix <- rbind(yVar,xVar)
  rownames(dataMatrix) <- c("Row1","Row2")
  colnames(dataMatrix) <-c("Column1",
                         "Column2",
                         "Column3")
  print(chisq.test(dataMatrix))
  pvalue <-c("P-value=", format(chisq.test(dataMatrix)$p.value, nsmall=3,digits=3))
  mosaicplot(x=dataMatrix,
           col=c("goldenrod","grey","mediumorchid1"),
           shade=FALSE)
  legend("bottomleft",bty="n",legend=pvalue)
  par(mfrow=c(2,1))

  barplot(height=dataMatrix,
        beside=TRUE,
        col=c("indianred2","lightslateblue"))
}
#################################################
# Printing default values
doContTable()
## 
##  Pearson's Chi-squared test
## 
## data:  dataMatrix
## X-squared = 6.1911, df = 2, p-value = 0.04525

# Using made-up data
yCont <- c(13,62,14)
xCont <- c(2,72,53)
doContTable(yCont,xCont)
## 
##  Pearson's Chi-squared test
## 
## data:  dataMatrix
## X-squared = 25.622, df = 2, p-value = 2.73e-06

ANOVA

####################################################################################
# Function: doAnova
# Reports F value (test statistic for the variation between sample means), 
# Inputs: continuous numeric vector for x and a discrete vector for y
# Outputs: a mosaic plot with p-value and a bar plot

doAnova <- function(y=sample(1:100,9),x=c("Category1","Category2", "Category3")) {
  yVar <- y
  xVar <- x
  dataFrame <- data.frame(xVar,yVar)

 AnovaModel<- aov(yVar~xVar,data=dataFrame)
  
  print(summary(AnovaModel)[[1]][["F value"]][1])

}
#######################################################################
# Printing default values
doAnova()
## [1] 2.63234
# Using made-up data
y <- c(10,2,6,10,12,3,3,18,7)
x <- c("Low","Medium","High")
doAnova(y=y,x=x)
## [1] 0.748062
########################################################################
# Function: GraphAnova
# Graphs box plot with whiskers 
# Input: continuous numeric vector for x and discrete vector for y
# Output: box plot with whiskers and F value

GraphAnova <- function(y=sample(1:100,9),x=c("Category1","Category2", "Category3")) {
  yVar <- y
  xVar <- x
  dataFrame <- data.frame(xVar,yVar)

 AnovaModel<- aov(yVar~xVar,data=dataFrame)
  
  
  return(boxplot(yVar~xVar,data=dataFrame,col=c("purple","limegreen","royalblue")))
}
# Graphing default values
GraphAnova()

# Graphing made-up values
y <- c(10,2,6,10,12,3,3,18,7)
x <- c("Low","Medium","High")
GraphAnova(y=y,x=x)

Logistic Regression

###################################################################
# Function: LogReg
# Completes a logistic regression and outputs summary of logistic regression
# Inputs: continuous numeric vector for x and a discrete vector for y
# Outputs: summary of logisic regression

LogReg <- function(y=c("Category1","Category2","Category3"),x=sample(1:100,12)) {
  yVar <- y
  xVar <- x
  dataFrame <- data.frame(xVar,yVar)

  
  logRegModel <- glm(yVar ~ xVar,
                 data=dataFrame,
                 family=binomial(link="logit"))
  
  print(summary(logRegModel))
}
# Testing default values
LogReg()
## 
## Call:
## glm(formula = yVar ~ xVar, family = binomial(link = "logit"), 
##     data = dataFrame)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5723  -1.1384   0.5962   0.9253   1.2805  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  2.63450    2.01844   1.305    0.192
## xVar        -0.02903    0.02695  -1.077    0.281
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 15.276  on 11  degrees of freedom
## Residual deviance: 13.851  on 10  degrees of freedom
## AIC: 17.851
## 
## Number of Fisher Scoring iterations: 4
# Using made-up data
y <- c("Low","Medium","High")
x <- c(20,32,41,28,25,18,19,28,30,31,37,42)
LogReg(y=y,x=x)
## 
## Call:
## glm(formula = yVar ~ xVar, family = binomial(link = "logit"), 
##     data = dataFrame)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9939  -1.0095   0.6626   0.8542   1.1601  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  3.54535    2.81206   1.261    0.207
## xVar        -0.09471    0.08862  -1.069    0.285
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 15.276  on 11  degrees of freedom
## Residual deviance: 13.998  on 10  degrees of freedom
## AIC: 17.998
## 
## Number of Fisher Scoring iterations: 4
#################################################
# Function: GraphLogReg
# graphs the logistic regression
# inputs: continuous numeric vector for x and a discrete vector for y
# outputs: graph of logistic regression


GraphLogReg <- function(y=rbinom(n=12,size=1,p=0.5),x=sample(1:100,12)) {
  yVar <- y
  xVar <- x
  dataFrame <- data.frame(xVar,yVar)
  logRegModel <- glm(yVar ~ xVar,
                 data=dataFrame,
                 family=binomial(link="logit"))
  
plot(x=dataFrame$xVar, y=dataFrame$yVar,pch=21,bg="violet",cex=2.5)
curve(predict(logRegModel,data.frame(xVar=x),type="response"),add=TRUE,lwd=2)
}
# Using defaults
GraphLogReg()

# Using made up data
yVar <- c(0,1,0,0,1,1)
xVar <- c(3,6,2,8,4,3)
GraphLogReg(x=x,y=y)