######################################################################
# FUNCTION: fitLinearSummary
# fits a regression line
# inputs: continuous numeric vector for both x and y variables
# outputs: slope, p-value, and R^2 value
fitLinearSummary <- function(y=runif(50),x=runif(50)) {
yVar <-y
xVar <- x
dataFrame <- data.frame(yVar,xVar)
regModel <- lm(yVar~xVar,data=dataFrame)
myOut<- c(slope = summary(regModel)$coefficients[2,1],
pvalue = summary(regModel)$coefficients[2,4],
Rsquared = summary(regModel)$adj.r.squared)
return(myOut)
}
#####################################################################
# Using default settings:
fitLinearSummary() # this returns the slope, pvalue and adjusted R squared value
## slope pvalue Rsquared
## -0.05249248 0.65757687 -0.01661970
# Using made-up data where x = y which would yield an R squared value of 1 (100% of the variance is explained by these variables)
xLin <- seq(1:20)
yLin <- seq(1:20)
fitLinearSummary(yLin,xLin) # as expected, the R squared value is 1; however, there is an error as there is essentially perfect fit
## Warning in summary.lm(regModel): essentially perfect fit: summary may be
## unreliable
## Warning in summary.lm(regModel): essentially perfect fit: summary may be
## unreliable
## Warning in summary.lm(regModel): essentially perfect fit: summary may be
## unreliable
## slope pvalue Rsquared
## 1.000000e+00 1.278962e-281 1.000000e+00
###########################################################################
# Function: plotLinear
# Plots a scatter plot of data with the R squared value, slope, and pvalue
# Inputs: Continuous numerical vector for both variables
# Outputs: scatter plot with line of best fit
plotLinear <- function(y=runif(50),x=runif(50)) {
yVar <-y
xVar <- x
dataFrame <- data.frame(yVar,xVar)
regModel <- lm(yVar~xVar,data=dataFrame)
# myOut<- c(slope = summary(regModel)$coefficients[2,1],
# pvalue = summary(regModel)$coefficients[2,4],
# Rsquared = summary(regModel)$adj.r.squared)
Rsquared <-c("R-sq=", format(summary(regModel)$adj.r.squared, nsmall=3,digits=3))
plot(y=dataFrame$yVar,x=dataFrame$xVar,pch=21,bg="cornflowerblue",main="Linear Regression", ylab="Y Axis", xlab="X Axis")
legend("topright",bty="n",legend=Rsquared)
abline(regModel,col="blue")
}
####################################################################
# Plotting the default values:
plotLinear()
# Plotting made-up data
XLinPlot <- c(3,5,4,5,2)
YLinPlot <- c(4,6,3,4,1)
plotLinear(YLinPlot,XLinPlot)
# Contingency Table
###################################################
# Function: summaryContTable
# Reports the results from the Chi Squared analysis
# Inputs: discrete numerical vectors for both
# Outputs: the results from the Chi Square analysis
summaryContTable <- function(y=sample(1:100,3),x=sample(1:100,3)) {
yVar <- y
xVar <- x
dataMatrix <- rbind(yVar,xVar)
rownames(dataMatrix) <- c("Row1","Row2")
colnames(dataMatrix) <-c("Column1",
"Column2",
"Column3")
print(chisq.test(dataMatrix))
}
#############################################
# Testing defaults
summaryContTable()
##
## Pearson's Chi-squared test
##
## data: dataMatrix
## X-squared = 40.685, df = 2, p-value = 1.464e-09
# Using made-up data (same as below)
yCont <- c(13,62,14)
xCont <- c(2,72,53)
summaryContTable(yCont,xCont)
##
## Pearson's Chi-squared test
##
## data: dataMatrix
## X-squared = 25.622, df = 2, p-value = 2.73e-06
##############################################################################################
# Function: doContTable
# Makes a contingency table from the data and reports p - value on mosaic plot
# Inputs: discrete numerical vectors for both
# Outputs: a mosaic plot with p-value and a bar plot
doContTable <- function(y=sample(1:100,3),x=sample(1:100,3)) {
yVar <- y
xVar <- x
dataMatrix <- rbind(yVar,xVar)
rownames(dataMatrix) <- c("Row1","Row2")
colnames(dataMatrix) <-c("Column1",
"Column2",
"Column3")
print(chisq.test(dataMatrix))
pvalue <-c("P-value=", format(chisq.test(dataMatrix)$p.value, nsmall=3,digits=3))
mosaicplot(x=dataMatrix,
col=c("goldenrod","grey","mediumorchid1"),
shade=FALSE)
legend("bottomleft",bty="n",legend=pvalue)
par(mfrow=c(2,1))
barplot(height=dataMatrix,
beside=TRUE,
col=c("indianred2","lightslateblue"))
}
#################################################
# Printing default values
doContTable()
##
## Pearson's Chi-squared test
##
## data: dataMatrix
## X-squared = 6.1911, df = 2, p-value = 0.04525
# Using made-up data
yCont <- c(13,62,14)
xCont <- c(2,72,53)
doContTable(yCont,xCont)
##
## Pearson's Chi-squared test
##
## data: dataMatrix
## X-squared = 25.622, df = 2, p-value = 2.73e-06
####################################################################################
# Function: doAnova
# Reports F value (test statistic for the variation between sample means),
# Inputs: continuous numeric vector for x and a discrete vector for y
# Outputs: a mosaic plot with p-value and a bar plot
doAnova <- function(y=sample(1:100,9),x=c("Category1","Category2", "Category3")) {
yVar <- y
xVar <- x
dataFrame <- data.frame(xVar,yVar)
AnovaModel<- aov(yVar~xVar,data=dataFrame)
print(summary(AnovaModel)[[1]][["F value"]][1])
}
#######################################################################
# Printing default values
doAnova()
## [1] 2.63234
# Using made-up data
y <- c(10,2,6,10,12,3,3,18,7)
x <- c("Low","Medium","High")
doAnova(y=y,x=x)
## [1] 0.748062
########################################################################
# Function: GraphAnova
# Graphs box plot with whiskers
# Input: continuous numeric vector for x and discrete vector for y
# Output: box plot with whiskers and F value
GraphAnova <- function(y=sample(1:100,9),x=c("Category1","Category2", "Category3")) {
yVar <- y
xVar <- x
dataFrame <- data.frame(xVar,yVar)
AnovaModel<- aov(yVar~xVar,data=dataFrame)
return(boxplot(yVar~xVar,data=dataFrame,col=c("purple","limegreen","royalblue")))
}
# Graphing default values
GraphAnova()
# Graphing made-up values
y <- c(10,2,6,10,12,3,3,18,7)
x <- c("Low","Medium","High")
GraphAnova(y=y,x=x)
###################################################################
# Function: LogReg
# Completes a logistic regression and outputs summary of logistic regression
# Inputs: continuous numeric vector for x and a discrete vector for y
# Outputs: summary of logisic regression
LogReg <- function(y=c("Category1","Category2","Category3"),x=sample(1:100,12)) {
yVar <- y
xVar <- x
dataFrame <- data.frame(xVar,yVar)
logRegModel <- glm(yVar ~ xVar,
data=dataFrame,
family=binomial(link="logit"))
print(summary(logRegModel))
}
# Testing default values
LogReg()
##
## Call:
## glm(formula = yVar ~ xVar, family = binomial(link = "logit"),
## data = dataFrame)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5723 -1.1384 0.5962 0.9253 1.2805
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.63450 2.01844 1.305 0.192
## xVar -0.02903 0.02695 -1.077 0.281
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 15.276 on 11 degrees of freedom
## Residual deviance: 13.851 on 10 degrees of freedom
## AIC: 17.851
##
## Number of Fisher Scoring iterations: 4
# Using made-up data
y <- c("Low","Medium","High")
x <- c(20,32,41,28,25,18,19,28,30,31,37,42)
LogReg(y=y,x=x)
##
## Call:
## glm(formula = yVar ~ xVar, family = binomial(link = "logit"),
## data = dataFrame)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9939 -1.0095 0.6626 0.8542 1.1601
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.54535 2.81206 1.261 0.207
## xVar -0.09471 0.08862 -1.069 0.285
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 15.276 on 11 degrees of freedom
## Residual deviance: 13.998 on 10 degrees of freedom
## AIC: 17.998
##
## Number of Fisher Scoring iterations: 4
#################################################
# Function: GraphLogReg
# graphs the logistic regression
# inputs: continuous numeric vector for x and a discrete vector for y
# outputs: graph of logistic regression
GraphLogReg <- function(y=rbinom(n=12,size=1,p=0.5),x=sample(1:100,12)) {
yVar <- y
xVar <- x
dataFrame <- data.frame(xVar,yVar)
logRegModel <- glm(yVar ~ xVar,
data=dataFrame,
family=binomial(link="logit"))
plot(x=dataFrame$xVar, y=dataFrame$yVar,pch=21,bg="violet",cex=2.5)
curve(predict(logRegModel,data.frame(xVar=x),type="response"),add=TRUE,lwd=2)
}
# Using defaults
GraphLogReg()
# Using made up data
yVar <- c(0,1,0,0,1,1)
xVar <- c(3,6,2,8,4,3)
GraphLogReg(x=x,y=y)