Using R for bioassessment data: What, why, and how

relative

# print
print('hello world!')
## [1] "hello world!"
# sequence
seq(1, 10)
##  [1]  1  2  3  4  5  6  7  8  9 10
# random numbers
rnorm(100, mean = 10, sd = 2)
##   [1] 15.029863  9.504855  8.495452 10.735446 10.147796 11.155330  8.059115
##   [8] 11.183410  7.636009 13.084366  8.176553 11.556673 10.870692 11.791386
##  [15]  9.911749 10.933113  9.625723  8.604161 10.833915 11.185085 14.206069
##  [22]  9.738960  9.776363 10.969604  8.976868  9.456172  8.813325 13.220348
##  [29]  6.255681 11.233555  8.403339  7.312847 10.753612 11.922396  7.476220
##  [36]  9.773417  9.397070  9.161689  9.166348 15.131267  9.604910 10.148325
##  [43]  8.029399 10.743290 10.391792 11.176691  7.758645 11.497596 10.710358
##  [50] 11.530855  6.672026  9.071498 12.772938 10.127421 11.664292  9.122088
##  [57]  7.476628  9.210463  8.123345  9.385807  9.547906 10.980478 10.760303
##  [64] 11.153934  9.599652  9.204581  6.608733  6.840840 10.146724  9.660492
##  [71]  9.772478  7.465127  9.246003 11.814800  9.514882  7.540854  9.893602
##  [78] 10.258499  8.565367 12.852896 12.131330 10.838035 10.210317 12.011242
##  [85]  9.407239 12.972351 10.379250  8.412654  9.893556 13.424042  8.356429
##  [92] 11.928442 10.658048  7.511820 12.253362 10.378984 10.950845  9.504111
##  [99]  6.661446 12.076945
# average 
mean(rnorm(100))
## [1] -0.006075047
# sum
sum(rnorm(100))
## [1] -7.906235
my_random_sum <- sum(rnorm(100))
my_random_sum
## [1] -8.787773
# install packages from CRAN
install.packages("tidyverse")
install.packages("sf")
install.packages("mapview")
install.packages("viridis")
install.packages("USAboundaries")
library("tidyverse")
library("sf")
library("mapview")
library("viridis")
library("USAboundaries")
# Using the help command/shortcut
# When you know the name of a function
help("print") # Help on the print command
?print # Help on the print command using the `?` shortcut

# When you know the name of the package
help(package = "sf") # Help on the package `dplyr`

# Don't know the exact name or just part of it
apropos("print") # Returns all available functions with "print" in the name
??print # shortcut, but also searches demos and vignettes in a formatted page
dbl_var <- c(1, 2.5, 4.5)
int_var <- c(1L, 6L, 10L)
log_var <- c(TRUE, FALSE, T, F)
chr_var <- c("a", "b", "c")
class(dbl_var)
## [1] "numeric"
length(log_var)
## [1] 4
# taking the mean of a character vector
mean(chr_var)

# adding two numeric vectors of different lengths
vec1 <- c(1, 2, 3, 4)
vec2 <- c(2, 3, 5)
vec1 + vec2
ltrs <- c('a', 'b', 'c')
nums <- c(1, 2, 3)
logs <- c(T, F, T)
mydf <- data.frame(ltrs, nums, logs)
mydf
##   ltrs nums  logs
## 1    a    1  TRUE
## 2    b    2 FALSE
## 3    c    3  TRUE
cscidat <- read.csv('data/cscidat.csv', stringsAsFactors = F)
ascidat <- read.csv('data/ascidat.csv', stringsAsFactors = F)
# get the dimensions
dim(cscidat)
## [1] 1613   10
dim(ascidat)
## [1] 2585    3
# get the column names
names(cscidat)
##  [1] "SampleID_old"   "StationCode"    "New_Lat"        "New_Long"      
##  [5] "COMID"          "E"              "OE"             "pMMI"          
##  [9] "CSCI"           "SampleID_old.1"
names(ascidat)
## [1] "id"        "site_type" "ASCI"
# see the first six rows
head(cscidat)
##          SampleID_old StationCode  New_Lat  New_Long    COMID        E
## 1 000CAT148_8.10.10_1   000CAT148 39.07523 -119.8994  8942501 16.05804
## 2 000CAT228_8.10.10_1   000CAT228 39.07307 -119.9201  8942503 16.08960
## 3  102PS0139_8.9.10_1   102PS0139 41.99595 -122.9597 23936337 15.46439
## 4 103CDCHHR_9.14.10_1   103CDCHHR 41.78890 -124.0778 22226836 21.10443
## 5 103FC1106_7.15.14_1   103FC1106 41.93407 -124.1081 22226634 16.83757
## 6 103FCA168_7.24.13_1   103FCA168 41.64962 -124.0912 22226990 19.07408
##          OE      pMMI      CSCI      SampleID_old.1
## 1 0.9309977 1.0449580 0.9879779 000CAT148_8.10.10_1
## 2 0.9726777 0.9896232 0.9811505 000CAT228_8.10.10_1
## 3 1.0896002 1.0535386 1.0715694  102PS0139_8.9.10_1
## 4 1.0898184 1.0834653 1.0866419 103CDCHHR_9.14.10_1
## 5 1.0779468 0.9163731 0.9971599 103FC1106_7.15.14_1
## 6 1.0931064 1.0335179 1.0633122 103FCA168_7.24.13_1
head(ascidat)
##                    id    site_type      ASCI
## 1 000CAT148_8.10.10_1    Reference 1.1950555
## 2 000CAT228_8.10.10_1    Reference 1.1514480
## 3  102PS0139_8.9.10_1 Intermediate 0.9345882
## 4 102PS0177_8.28.12_1    Reference 1.1965128
## 5 102PS0177_8.28.12_2    Reference 1.2091360
## 6 103CDCHHR_9.14.10_1    Reference 0.8369236
# get the overall structure
str(cscidat)
## 'data.frame':    1613 obs. of  10 variables:
##  $ SampleID_old  : chr  "000CAT148_8.10.10_1" "000CAT228_8.10.10_1" "102PS0139_8.9.10_1" "103CDCHHR_9.14.10_1" ...
##  $ StationCode   : chr  "000CAT148" "000CAT228" "102PS0139" "103CDCHHR" ...
##  $ New_Lat       : num  39.1 39.1 42 41.8 41.9 ...
##  $ New_Long      : num  -120 -120 -123 -124 -124 ...
##  $ COMID         : int  8942501 8942503 23936337 22226836 22226634 22226990 22227592 22226948 22226612 22226750 ...
##  $ E             : num  16.1 16.1 15.5 21.1 16.8 ...
##  $ OE            : num  0.931 0.973 1.09 1.09 1.078 ...
##  $ pMMI          : num  1.045 0.99 1.054 1.083 0.916 ...
##  $ CSCI          : num  0.988 0.981 1.072 1.087 0.997 ...
##  $ SampleID_old.1: chr  "000CAT148_8.10.10_1" "000CAT228_8.10.10_1" "102PS0139_8.9.10_1" "103CDCHHR_9.14.10_1" ...
str(ascidat)
## 'data.frame':    2585 obs. of  3 variables:
##  $ id       : chr  "000CAT148_8.10.10_1" "000CAT228_8.10.10_1" "102PS0139_8.9.10_1" "102PS0177_8.28.12_1" ...
##  $ site_type: chr  "Reference" "Reference" "Intermediate" "Reference" ...
##  $ ASCI     : num  1.195 1.151 0.935 1.197 1.209 ...
View(cscidat)
View(ascidat)
library(readxl)
dat <- read_excel('location/of/excel/file.xlsx')

Using R for bioassessment data: What, why, and how

Lesson Outline

Lesson Exercises

Goals and Motivation

Why should I invest time in R?

RStudio

Open R and RStudio

RStudio projects

Scripting

Executing code in RStudio

What is the environment?

Exercise 1

R language fundamentals

Packages

CRAN

Installing packages

Exercise 2

Getting Help

Help from the console

Official R Resources

Google and StackOverflow

Other Resources

Data structures in R

Vectors (one-dimensional data)

2-dimensional data

Getting your data into R

Exercise 3

Other ways to import data

Summary