WELCOME TO SOFTWARE CARPENTRY UNSW 2014.02.13 Day 2 http://rspb.royalsocietypublishing.org/content/281/1778/20132570/suppl/DC1 Like yesterday, LUNCH provided downstairs in the courtyard!!! We'll be working with the same data as yesterday: https://raw.github.com/nicercode/gapminder/master/gapminder-FiveYearData.csv You can check you have it with this command (from RStudio) file.exists("gapminder-FiveYearData.csv") rm(list=ls()) # Two functions from yesterday that we'll use this morning: variance <- function(x) { mean_x <- mean(x) n <- length(x) 1/(n - 1) * sum((x - mean_x)^2) } linear.rescale <- function(x, range) { p <- (x - min(x)) / (max(x) - min(x)) range[[1]] + p * (range[[2]] - range[[1]]) } # Data that we will use. data <- read.csv("gapminder-FiveYearData.csv", stringsAsFactors=FALSE) data.1982 <- data[data$year == 1982,] library(testthat) test_file("test-swc.R") # Put this in test-swc.R set.seed(1) x<-runif(10) expect_that(variance(x), equals(var(x))) ----- more notes from Day 1 below here ---- col <- colour.by.category(data.1982$continent, col.table) cex <- linear.rescale(sqrt(data.1982$pop), range=c(0.2, 10)) plot(lifeExp ~ gdpPercap, data.1982, log="x", cex=cex, col=col) add.trend.line <- function(x, y, d) { fit <- lm(d[[y]] ~ log10(d[[x]])) abline(fit) } add.trend.line("gdpPercap", "lifeExp", data.1982) plot(lifeExp ~ gdpPercap, data.1982, log="x", cex=cex, col=col) for (continent in unique(data.1982$continent)) { add.trend.line("gdpPercap", "lifeExp", data.1982[data.1982$continent == continent,]) } my.plot <- function(year, data, cols) { data.year <- data[data$year == year,] col <- colour.by.category(data.year$continent, cols) cex <- linear.rescale(sqrt(data.year$pop), range=c(0.2, 10)) plot(lifeExp ~ gdpPercap, data.year, log="x", cex=cex, col=col) for (continent in unique(data.year$continent)) { add.trend.line("gdpPercap", "lifeExp", d=data.year[data.year$continent == continent,], col=cols[[continent]]) } } for (i in unique(data$year)) { my.plot(i, data, col.table) } ---------------PLYR--------------------------- rm(list=ls()) data <- read.csv("gapminder-FiveYearData.csv", stringsAsFactors=FALSE) library(plyr) get.n.countries <- function(x){ a<- unique(x$country) length(a) } get.n.countries(dat) data.asia <- data[data$continent == "Asia",] asia.n <- get.n.countries(data.asia) model <- function(x){ fit <- lm(lifeExp ~ log10(gdpPercap), data=x) c(n=length(x$lifeExp), r2= summary(fit)$r.squared, a= coef(fit)[[1]], b= coef(fit)[[2]]) } out <- ddply(data, .(continent,year), model) ##ALTERNATIVE PROJECT SET-UPS http://carlboettiger.info/2012/05/06/research-workflow.html http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1000424 UNZIP this https://www.dropbox.com/s/wadwoyxvup9a1fc/shell_exercise_files.zip ## Quick exercise 01 Change into your home directory; Then go to the directory where you saved your project setup yesterday; Then into `data`; List the contents of this directory; Then change back into your home again. ## Short exercise 02 Go to your home directory; Create a new directory called `shell_exercise_2`; Dowload this zip file here: https://www.dropbox.com/s/ccrb1qrmsb7hm6n/shell_exercise_2.zip And unzip it to the `shell_exercise_2` directory (make sure you remove the .zip version afterwards); Then create sub-directories in `shell_exercise_2`: `data`, `docs`, `output/data`, `output/figures` and `R`; Then move the respective file types into their matching directory type following yesterday's project setup; Go back to the `shell_exercise_2` directory; Rename (it's the same as mv command) it to `clean-folder`. Hints: You can combine several steps into one. Tab completion and wildcards are your friends. mkdir mv wrong.name right.name cp mkdir -p shell_exercise_2/{R,data,outuput/{data,figures},docs} mkdir -p R,data,output/{data,figures},docs mv ls * This worked for me on windows/git mkdir -p shell_exercise2/{'data','docs','output'/{'data','figures'}} Links with keyboard shortcuts for bash http://en.wikipedia.org/wiki/Bash_%28Unix_shell%29 http://www.skorks.com/2009/09/bash-shortcuts-for-maximum-productivity/ ###########GIT############### git config --global user.name "Daniel Falster" git config --global user.email "daniel.falster@mq.edu.au" git config --global color.ui "auto" git config --list git config --global core.autocrlf "true" #windows git config --global core.autocrlf "input" #mac If you get an error in windows saying terminal not fully functional type the following into shell: TERM=msys git add day1.R git commit -m "added code from first day" git add day2.R git commit -m "added code from second day" git log touch .gitignore #open .gitignore in Rstduio and add these lines .Rdata .Rhistory .DS_Store git add .gitignore git commit -m "Ignore R files that we don't want to track" git add data git commit -m "Added data" git status #EXERCISE 1. Commit an R file to your project 2. Make some changes to the R file 3. Use git status to see if file has been modified (it should be) 4. Use git diff to look at and review changes 5. Use git add and git commit to commit the changes 6. Use git status to check that file no longer listed as modified draw.heart<-function(){ data<- data.frame(t=seq(0, 2*pi, by=0.1) ) xhrt <- function(t) 16*sin(t)^3 yhrt <- function(t) 13*cos(t)-5*cos(2*t)-2*cos(3*t)-cos(4*t) data$y=yhrt(data$t) data$x=xhrt(data$t) plot(data$x,data$y, type="n") polygon(data$x, data$y, col="hotpink") } happy.v.day<-function(name){ draw.heart() text.to.plot<-paste("Happy Valentine's Day",name) text(0,7,text.to.plot,col='red',cex=2.5) } https://github.com/richfitz/gapminder ### Reproducible research ########### library(plyr) source("R/figures_functions.R") data <- read.csv("data/gapminder-FiveYearData.csv", stringsAsFactors=FALSE) # For each year, fit linear model to life expectancy vs gdp by continent fitted.linear.model <- dlply(data, .(continent, year), function(x)lm(lifeExp ~ log10(gdpPercap), data=x)) model.summary <- function(x){ data.frame(r2=summary(x)$r.squared, n=length(x$model$lifeExp), a=coef(x)[[1]], b=coef(x)[[2]])} model.data <- ldply(fitted.linear.model,model.summary)