Read online table
Download table (’*.csv’)
# set variables
csv_url <- "https://oceanview.pfeg.noaa.gov/erddap/tabledap/cciea_AC.csv"
dir_data <- "data"
# derived variables
csv <- file.path(dir_data, basename(csv_url))
# create directory
dir.create(dir_data)
## Warning in dir.create(dir_data): 'data' already exists
# download file
download.file(csv_url, csv)
Read table ‘read.csv()’
# attempt to read csv
d <- read.csv(csv)
# show the data frame
#d
# read csv by skipping first two lines, so no header
d <- read.csv(csv, skip = 2, header = FALSE)
#d
# update data frame to original column names
names(d) <- names(read.csv(csv))
#d
Show table ‘DT::datatable()’
## show table
DT::datatable(d)
library(DT)
datatable(d)
Wrangle data
Manipulate with ‘dplyr’
library(DT)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
d <- d %>%
# tibble
tibble() %>%
#mutate time
mutate(
time = as.Date(substr(time, 1, 10))) %>%
# select columns
select(time, starts_with("total_fisheries_revenue")) %>%
# filter rows
filter(time >= as.Date("1981-01-01"))
datatable(d)
Tidy with ‘tidyr’
library(tidyr)
d <- d %>%
pivot_longer(-time)
datatable(d)
Summarize with ‘dplyr’
library(stringr)
d <- d %>%
mutate(region = str_replace(name, "total_fisheries_revenue_", "")) %>%
select(time, region, value)
datatable(d)
d_sum <- d %>%
group_by(region) %>%
summarize(avg_revenue = mean(value))
datatable(d_sum) %>%
formatCurrency("avg_revenue")
Apply functions with ‘purrr’ on a ‘nest’‘ed ’tibble’
library(purrr)
n <- d %>%
group_by(region) %>%
nest(data = c(time, value))
n
## # A tibble: 4 x 2
## # Groups: region [4]
## region data
## <chr> <list>
## 1 ca <tibble [40 x 2]>
## 2 coastwide <tibble [40 x 2]>
## 3 or <tibble [40 x 2]>
## 4 wa <tibble [40 x 2]>
n <- n %>%
mutate(lm = map(data, function(d){
lm(value ~ time, d)
}),
trend = map_dbl(lm, function(m){
coef(summary(m))["time", "Estimate"]}))
n
## # A tibble: 4 x 4
## # Groups: region [4]
## region data lm trend
## <chr> <list> <list> <dbl>
## 1 ca <tibble [40 x 2]> <lm> -0.0117
## 2 coastwide <tibble [40 x 2]> <lm> -0.00454
## 3 or <tibble [40 x 2]> <lm> 0.00379
## 4 wa <tibble [40 x 2]> <lm> 0.00333
n %>%
select(region, trend) %>%
datatable()