### This script was written by Dr. Heather Merk and accompanies the "Introduction to R Statistical Software: Application to Plant Breeding" webinar presented by Dr. Merk in September 2011. ## Read in a .csv dataset # For PC OHColor = read.csv("C:/Users/merk.9/Desktop/Documents/2011 Webinar Series/2009OHColorSample.csv", header=T) # For Mac OHColor = read.csv("/Users/heathermerk/Documents/eXtension/2011 Webinar Series/2009OHColorSample.csv", header=T) ## Read in a .txt dataset # For PC OHColor = read.txt("C:/Users/merk.9/Desktop/Documents/2011 Webinar Series/2009OHColorSample.txt", header=T) # For Mac OHColor = read.txt("/Users/heathermerk/Documents/eXtension/2011 Webinar Series/2009OHColorSample.txt", header=T) ## Check that data was imported successfully # For small data sets OHColor # Alternative for small data sets print(OHColor) # Alternative for small data sets summary(OHColor) # For large data sets # To see the first rows of data including header head(OHColor) # To see the last rows of data including tail tail(OHColor) # To see the structure of the data set including the number of observations, the number of variables, the variable names, the number of levels of each variable str(OHColor) ## Create a histogram for a numeric variable hist(OHColor$Param1) ## Create a histogram for a numeric variable with custom axes, color hist(OHColor$Avggreen, prob=T, xlab="Average Green", ylab="Number of Fruit", main = "Average Green Histogram for OHIO 2009 Processing", col=3); lines(density(OHColor$Avggreen, na.rm=T, bw=2)) ## Test for differences in average green between lines # Create a linear model fit1 = lm(formula=OHColor$Avggreen~as.factor(OHColor$Line)) anova(fit1) summary(fit1) ## Simplifying the linear model # Rename variables for ease of use LINE = as.factor(OHColor$Line) AVGGREEN = OHColor$Avggreen # Simplified model fit1a = lm(AVGGREEN~LINE) anova(fit1a) ## Check assumptions for ANOVA - plots plot(fit1a) ## Calculate mean for a numeric variable and ignore missing data mean(AVGGREEN, na.rm=T) ## Calculated standard deviation for a numeric variable and ignore missing data sd(AVGGREEN, na.rm=T) ## Calculate mean by rep for a numeric variable tapply(AVGGREEN, na.rm=T, as.factor(OHColor$Rep), mean) ## Does a line have a higher average green value than the overall mean? # t-test requires that we have numeric vectors # AVGGREEN is already a numeric vector # create numeric vector with all average green values for a line sct0006 <- OHColor[OHColor$Line=="SCT_0006", "Avggreen"] # check vector sct0006 # t-test t.test(sct0006, AVGGREEN, alternative="greater", var.equal=T) ## Create a boxplot to look at average green by line boxplot(AVGGREEN~LINE) ## Working with multi year data # Import Dataset with Ohio color data from 2010 # For PC OHColor2010 = read.csv("C:/Users/merk.9/Desktop/Documents/2011 Webinar Series/2010OHColorSample.csv", header=T) # For Mac OHColor2010 = read.csv("/Users/heathermerk/Documents/eXtension/2011 Webinar Series/2010OHColorSample.csv", header=T) # Check that data was imported correctly str(OHColor2010) ## Combine data from both years CombinedColor = rbind(OHColor, OHColor2010) head(CombinedColor) tail(CombinedColor) str(Combined Color) # Rename variables so that rep and year are factors and for ease of use LINE = as.factor(CombinedColor$Line) REP = as.factor(CombinedColor$Rep) YEAR = as.factor(CombinedColor$Year) AVGGREEN = as.numeric(CombinedColor$Avggreen) # Create and test model fit2 =lm(AVGGREEN~ LINE + YEAR + REP%in%YEAR + LINE:YEAR) anova(fit2) ## Estimating variance components # This requires the lme4 package. The first time you want to use this package, you need to install it using the GUI interface # Load the lme4 package library(lme4) # Create model fit3 = lmer(AVGGREEN~(1|LINE) + (1|YEAR) + (1|REP%in%YEAR) + (1|LINE:YEAR)) summary(fit3) ## Exporting a table # For PC write.table(CombinedColor, col.names=NA, "C:/Users/merk.9/Desktop/Documents/2011 Webinar Series/CombinedColorData.txt") # For Mac write.table(CombinedColor, col.names=NA, "/Users/heathermerk/Documents/eXtension/2011 Webinar Series/CombinedColorData.txt") ## for loop for (x in c(1:10)) print (sqrt(x)) for (x in c(1:10)) { print(sqrt(x)) } ## while loop - Fibonacci series where each number is the sum of the previous two numbers (0,1,1,2,3...). a <- 0 b <- 1 print(a) while (b<50) { print(b) temp<-a+b a<- b b<- temp }