Nice R Code

Punning code better since 2013

A script without using functions

The challenge: The Pantheria dataset is a species-level database of life history, ecology, and geography of extant and recently extinct mammals. The script below downloads the data, and make plots of some variables against each other, highlighting different orders in the plots.

The Problem: This script is provided as an example of messy code. It is typical of the code written by many biologists, (including our past selves). Some problems with it include:

  • It is difficult to read - what is doing?
  • There are large amounts of repeated code.
  • When we want to change something, we need to do it in many places.
  • The code adds lots of objects to the workspace. These are difficult to keep track of and make bugs more likely.

The solution: Look at the nice code here.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
rm(list=ls(all=TRUE)) #start with empty workspace

## Load the data
url <-
  "http://esapubs.org/archive/ecol/E090/184/PanTHERIA_1-0_WR93_Aug2008.txt"
if ( !file.exists("pantheria.txt") )
  download.file(url, "pantheria.txt")
data.all <- read.table("pantheria.txt", as.is=TRUE,
                       header=TRUE, sep="\t", check.names=FALSE,
                       na.strings="-999")

## Subset interesting columns and rename
cols <- c(Order="MSW93_Order",
          Family="MSW93_Family",
          Genus="MSW93_Genus",
          Species="MSW93_Species",
          Mass="5-1_AdultBodyMass_g",
          Length="13-1_AdultHeadBodyLen_mm",
          Longevity="17-1_MaxLongevity_m",
          Metabolic.rate = "5-2_BasalMetRateMass_g",
          Gestation.length= "9-1_GestationLen_d",
          HomeRange = "22-1_HomeRange_km2",
          OffspringSize="5-3_NeonateBodyMass_g",
          PopulationDensity="21-1_PopulationDensity_n/km2",
          AgeAtMaturation="23-1_SexualMaturityAge_d")

data <- data.all[cols]
names(data) <- names(cols)

## Convert longevity in months to years
data$Longevity <- data$Longevity / 12

plot(Longevity ~ Mass, data, log="xy", col="#66666666", pch=19,
     xlab="Body mass (g)", ylab="Longevity (years)", las=1)

## Add nice log-10 labels to the plot
usr <- par("usr")
r <- round(usr[1:2])
at <- seq(r[1], r[2])
lab <- do.call(expression, lapply(at, function(i) bquote(10^.(i))))
axis(1, at=10^at, lab)

## Note: we use log10 here because that's what the plot uses (not
## natural logs)
fit <- lm(log10(Longevity) ~ log10(Mass), data)
abline(fit, col="#66666666")

## Add focus group of points for Carnivora
data.order <- data[data$Order == "Carnivora",]
fit <- lm(log10(Longevity) ~ log10(Mass), data.order)
points(Longevity ~ Mass, data.order, pch=19, col="#ff000066")
abline(fit, col="#ff000066")

title(main="Carnivora", line=1)

plot(Longevity ~ Mass, data, log="xy", col="#66666666", pch=19,
     xlab="Body mass (g)", ylab="Longevity (years)", las=1)

## Add nice log-10 labels to the plot
usr <- par("usr")
r <- round(usr[1:2])
at <- seq(r[1], r[2])
lab <- do.call(expression, lapply(at, function(i) bquote(10^.(i))))
axis(1, at=10^at, lab)

## Note: we use log10 here because that's what the plot uses (not
## natural logs)
fit <- lm(log10(Longevity) ~ log10(Mass), data)
abline(fit, col="#66666666")

## Add focus group of points for Chiroptera
data.order <- data[data$Order == "Chiroptera",]
fit <- lm(log10(Longevity) ~ log10(Mass), data.order)
points(Longevity ~ Mass, data.order, pch=19, col="#ff000066")
abline(fit, col="#ff000066")

title(main="Chiroptera", line=1)


## Repeat for another variable pair

plot(Gestation.length ~ Mass, data, log="xy", col="#66666666", pch=19,
     xlab="Body mass (g)", ylab="Gestation.length (d)", las=1)

## Add nice log-10 labels to the plot
usr <- par("usr")
r <- round(usr[1:2])
at <- seq(r[1], r[2])
lab <- do.call(expression, lapply(at, function(i) bquote(10^.(i))))
axis(1, at=10^at, lab)

## Note: we use log10 here because that's what the plot uses (not
## natural logs)
fit <- lm(log10(Gestation.length) ~ log10(Mass), data)
abline(fit, col="#66666666")

## Add focus group of points for Carnivora
data.order <- data[data$Order == "Carnivora",]
fit <- lm(log10(Gestation.length) ~ log10(Mass), data.order)
points(Gestation.length ~ Mass, data.order, pch=19, col="#ff000066")
abline(fit, col="#ff000066")

title(main="Carnivora", line=1)

plot(Gestation.length ~ Mass, data, log="xy", col="#66666666", pch=19,
     xlab="Body mass (g)", ylab="Gestation.length (d)", las=1)

## Add nice log-10 labels to the plot
usr <- par("usr")
r <- round(usr[1:2])
at <- seq(r[1], r[2])
lab <- do.call(expression, lapply(at, function(i) bquote(10^.(i))))
axis(1, at=10^at, lab)

## Note: we use log10 here because that's what the plot uses (not
## natural logs)
fit <- lm(log10(Gestation.length) ~ log10(Mass), data)
abline(fit, col="#66666666")

## Add focus group of points for Chiroptera
data.order <- data[data$Order == "Chiroptera",]
fit <- lm(log10(Gestation.length) ~ log10(Mass), data.order)
points(Gestation.length ~ Mass, data.order, pch=19, col="#ff000066")
abline(fit, col="#ff000066")

title(main="Chiroptera", line=1)


## And another variable pair

plot(OffspringSize ~ Mass, data, log="xy", col="#66666666", pch=19,
     xlab="Body mass (g)", ylab="Offspring size (g)", las=1)

## Add nice log-10 labels to the plot
usr <- par("usr")
r <- round(usr[1:2])
at <- seq(r[1], r[2])
lab <- do.call(expression, lapply(at, function(i) bquote(10^.(i))))
axis(1, at=10^at, lab)

## Note: we use log10 here because that's what the plot uses (not
## natural logs)
fit <- lm(log10(OffspringSize) ~ log10(Mass), data)
abline(fit, col="#66666666")

## Add focus group of points for Carnivora
data.order <- data[data$Order == "Carnivora",]
fit <- lm(log10(OffspringSize) ~ log10(Mass), data.order)
points(OffspringSize ~ Mass, data.order, pch=19, col="#ff000066")
abline(fit, col="#ff000066")

title(main="Carnivora", line=1)

plot(OffspringSize ~ Mass, data, log="xy", col="#66666666", pch=19,
     xlab="Body mass (g)", ylab="Offspring size (g)", las=1)

## Add nice log-10 labels to the plot
usr <- par("usr")
r <- round(usr[1:2])
at <- seq(r[1], r[2])
lab <- do.call(expression, lapply(at, function(i) bquote(10^.(i))))
axis(1, at=10^at, lab)

## Note: we use log10 here because that's what the plot uses (not
## natural logs)
fit <- lm(log10(OffspringSize) ~ log10(Mass), data)
abline(fit, col="#66666666")

## Add focus group of points for Chiroptera
data.order <- data[data$Order == "Chiroptera",]
fit <- lm(log10(OffspringSize) ~ log10(Mass), data.order)
points(OffspringSize ~ Mass, data.order, pch=19, col="#ff000066")
abline(fit, col="#ff000066")

title(main="Chiroptera", line=1)

Download this script here

Comments