Introduction to R

# print
print('hello world!')
## [1] "hello world!"
# sequence
seq(1, 10)
##  [1]  1  2  3  4  5  6  7  8  9 10
# random numbers
rnorm(100, mean = 10, sd = 2)
##   [1]  8.531921 11.905716 11.251795  8.438629  9.450414  8.486819 10.243004
##   [8] 12.274041 10.197025  9.520608 10.810758  9.679102  9.111759  6.899313
##  [15] 10.932673 10.462370  6.201299 10.577593  6.452287  8.660075 12.135389
##  [22]  8.532985  9.595898 11.824422 10.982700  8.463049 10.586093 13.374349
##  [29] 10.224231  8.950736 10.206238  8.819724 10.955189 11.149782 10.684488
##  [36] 10.969980  9.696980 11.157332 12.117536  9.367518 12.061794  9.272101
##  [43]  9.030856 11.227226 10.945037  8.741399 10.144285 11.247540 11.113333
##  [50]  9.996750 10.336736 11.890317 10.467867 10.887259 13.292540  8.956129
##  [57] 12.559586  9.987806  9.995999 10.420017 11.839114 13.113262 13.162324
##  [64] 12.080590  8.458970 10.615528  6.484628 11.538441  5.998108 11.641810
##  [71]  7.355314 12.231942 13.406046  8.572844  6.931587  9.148484 10.668048
##  [78] 10.992527  9.660655  8.273125  9.018200  8.453254 10.173665 10.900872
##  [85]  8.383527 14.767823  6.640291  9.962048 12.812723  6.779957 12.000798
##  [92] 11.496873 11.315262 10.184519 10.602588  9.054141 10.566485  7.039364
##  [99]  8.111413  9.423223
# average 
mean(rnorm(100))
## [1] -0.04246624
# sum
sum(rnorm(100))
## [1] -2.929205
my_random_sum <- sum(rnorm(100))
my_random_sum
## [1] 6.390332
my_random_sum = sum(rnorm(100))
rnorm(100, mean = 10, sd = 1)
#Installing Packages from CRAN
# Install dplyr and ggplot2
install.packages("ggplot2")
install.packages("dplyr")

# You can also put more than one in like
install.packages(c("quickmapr","formatR"))
# Loading packages into your library
# Add libraries to your R Session
library("ggplot2")
library("dplyr")

# You can also access functions without loading by using package::function
dplyr::mutate
## function (.data, ...) 
## {
##     UseMethod("mutate")
## }
## <bytecode: 0x00000000182037a0>
## <environment: namespace:dplyr>
.libPaths()
## [1] "C:/Users/Marcus.SCCWRP2K/R/win-library/3.5"
## [2] "C:/Program Files/R/R-3.5.1/library"
# check if a package is installed
"dplyr" %in% installed.packages()

# check if it's loaded
sessionInfo()
# Using the help command/shortcut
# When you know the name of a function
help("print") # Help on the print command
?print # Help on the print command using the `?` shortcut

# When you know the name of the package
help(package = "dplyr") # Help on the package `dplyr`

# Don't know the exact name or just part of it
apropos("print") # Returns all available functions with "print" in the name
??print # shortcut, but also searches demos and vignettes in a formatted page
dbl_var <- c(1, 2.5, 4.5)
int_var <- c(1L, 6L, 10L)
log_var <- c(TRUE, FALSE, T, F)
chr_var <- c("a", "b", "c")
class(dbl_var)
## [1] "numeric"
length(log_var)
## [1] 4
# taking the mean of a character vector
mean(chr_var)

# adding two numeric vectors of different lengths
vec1 <- c(1, 2, 3, 4)
vec2 <- c(2, 3, 5)
vec1 + vec2
# combining a character and numeric
c('a', 1)
## [1] "a" "1"
# a matrix of characters
mymat <- matrix(letters, ncol = 13)
dim(mymat)
## [1]  2 13
# a matrix of numerics
mymat <- matrix(1:12, ncol = 4)
dim(mymat)
## [1] 3 4
ltrs <- c('a', 'b', 'c')
nums <- c(1, 2, 3)
logs <- c(T, F, T)
mydf <- data.frame(ltrs, nums, logs)
mydf
##   ltrs nums  logs
## 1    a    1  TRUE
## 2    b    2 FALSE
## 3    c    3  TRUE
names(mydf)
## [1] "ltrs" "nums" "logs"
row.names(mydf)
## [1] "1" "2" "3"
# create a simple list
ltrs <- c('a', 'b', 'c')
nums <- c(1, 2, 3, 4)
myls <- list(ltrs, nums)
myls
## [[1]]
## [1] "a" "b" "c"
## 
## [[2]]
## [1] 1 2 3 4
install.packages('readxl')
library(readxl)
dat <- read_excel('data/mpg.xlsx')
ls()
##  [1] "all_pipes"       "alldat"          "bad_ex"         
##  [4] "bad_ex2"         "bsmap"           "by_make"        
##  [7] "by_yr_drv"       "chemdat"         "chr_var"        
## [10] "cols"            "dat"             "dat_ext"        
## [13] "datchem"         "datlocs"         "dbl_var"        
## [16] "displ_cat"       "dplyr_arr"       "dplyr_good_fuel"
## [19] "dplyr_mut"       "dplyr_mut2"      "dplyr_rnm"      
## [22] "dplyr_sel1"      "dplyr_sel2"      "dplyr_sel3"     
## [25] "dplyr_six_cyl"   "filt1"           "filt2"          
## [28] "filt3"           "filt4"           "filt5"          
## [31] "good_ex"         "int_var"         "log_var"        
## [34] "logs"            "ltrs"            "metadat"        
## [37] "more_sums"       "mpg"             "my_random_sum"  
## [40] "mydf"            "myls"            "mymat"          
## [43] "nums"            "origin"          "p"              
## [46] "pkgs"            "polys"           "prm"            
## [49] "sedchem"         "stations"        "sumdat"         
## [52] "tidydat"         "x"
class(dat)
## [1] "tbl_df"     "tbl"        "data.frame"
head(dat)
## # A tibble: 6 x 11
##   manufacturer model displ  year   cyl trans drv     cty   hwy fl    class
##   <chr>        <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <chr> <chr>
## 1 audi         a4     1.80 1999.    4. auto~ f       18.   29. p     comp~
## 2 audi         a4     1.80 1999.    4. manu~ f       21.   29. p     comp~
## 3 audi         a4     2.00 2008.    4. manu~ f       20.   31. p     comp~
## 4 audi         a4     2.00 2008.    4. auto~ f       21.   30. p     comp~
## 5 audi         a4     2.80 1999.    6. auto~ f       16.   26. p     comp~
## 6 audi         a4     2.80 1999.    6. manu~ f       18.   26. p     comp~
dat <- as.data.frame(dat)
class(dat)
## [1] "data.frame"
# get the dimensions
dim(dat)
## [1] 234  11
# get the column names
names(dat)
##  [1] "manufacturer" "model"        "displ"        "year"        
##  [5] "cyl"          "trans"        "drv"          "cty"         
##  [9] "hwy"          "fl"           "class"
# get the row names
row.names(dat)
##   [1] "1"   "2"   "3"   "4"   "5"   "6"   "7"   "8"   "9"   "10"  "11" 
##  [12] "12"  "13"  "14"  "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22" 
##  [23] "23"  "24"  "25"  "26"  "27"  "28"  "29"  "30"  "31"  "32"  "33" 
##  [34] "34"  "35"  "36"  "37"  "38"  "39"  "40"  "41"  "42"  "43"  "44" 
##  [45] "45"  "46"  "47"  "48"  "49"  "50"  "51"  "52"  "53"  "54"  "55" 
##  [56] "56"  "57"  "58"  "59"  "60"  "61"  "62"  "63"  "64"  "65"  "66" 
##  [67] "67"  "68"  "69"  "70"  "71"  "72"  "73"  "74"  "75"  "76"  "77" 
##  [78] "78"  "79"  "80"  "81"  "82"  "83"  "84"  "85"  "86"  "87"  "88" 
##  [89] "89"  "90"  "91"  "92"  "93"  "94"  "95"  "96"  "97"  "98"  "99" 
## [100] "100" "101" "102" "103" "104" "105" "106" "107" "108" "109" "110"
## [111] "111" "112" "113" "114" "115" "116" "117" "118" "119" "120" "121"
## [122] "122" "123" "124" "125" "126" "127" "128" "129" "130" "131" "132"
## [133] "133" "134" "135" "136" "137" "138" "139" "140" "141" "142" "143"
## [144] "144" "145" "146" "147" "148" "149" "150" "151" "152" "153" "154"
## [155] "155" "156" "157" "158" "159" "160" "161" "162" "163" "164" "165"
## [166] "166" "167" "168" "169" "170" "171" "172" "173" "174" "175" "176"
## [177] "177" "178" "179" "180" "181" "182" "183" "184" "185" "186" "187"
## [188] "188" "189" "190" "191" "192" "193" "194" "195" "196" "197" "198"
## [199] "199" "200" "201" "202" "203" "204" "205" "206" "207" "208" "209"
## [210] "210" "211" "212" "213" "214" "215" "216" "217" "218" "219" "220"
## [221] "221" "222" "223" "224" "225" "226" "227" "228" "229" "230" "231"
## [232] "232" "233" "234"
# get the overall structure
str(dat)
## 'data.frame':    234 obs. of  11 variables:
##  $ manufacturer: chr  "audi" "audi" "audi" "audi" ...
##  $ model       : chr  "a4" "a4" "a4" "a4" ...
##  $ displ       : num  1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : num  1999 1999 2008 2008 1999 ...
##  $ cyl         : num  4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr  "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr  "f" "f" "f" "f" ...
##  $ cty         : num  18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : num  29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr  "p" "p" "p" "p" ...
##  $ class       : chr  "compact" "compact" "compact" "compact" ...
dat$fl
dat$"fl"
dat["fl"]
dat[,"fl"]
dat[["fl"]]
dat[10]
dat[,10]
dat[[10]]
install.packages('readr')
library(readr)
write_csv(dat, 'data/new_mpg.csv')

Introduction to R

Lesson Outline

Lesson Exercises

Goals and Motivation

Why should I invest time in R?

RStudio

Open R and RStudio

RStudio projects

Scripting

Executing code in RStudio

What is the environment?

Exercise 1

R language fundamentals

A few side notes

Packages

CRAN

Installing packages

Using packages

Getting Help

Help from the console

Official R Resources

Google and StackOverflow

Data structures in R

Vectors (one-dimensional data)

Coercion

2-dimensional data

Data I/O

Input

Output

Exercise 2

Retention

Attribution