Read online table

Download table (’*.csv’)

# set variables
csv_url <- "https://oceanview.pfeg.noaa.gov/erddap/tabledap/cciea_AC.csv"
dir_data <- "data"

# derived variables
csv <- file.path(dir_data, basename(csv_url))

# create directory
dir.create(dir_data)
## Warning in dir.create(dir_data): 'data' already exists
# download file
download.file(csv_url, csv)

Read table ‘read.csv()’

# attempt to read csv
d <- read.csv(csv)

# show the data frame
#d
# read csv  by skipping first two lines, so no header
d <- read.csv(csv, skip = 2, header = FALSE)
#d

# update data frame to original column names
names(d) <- names(read.csv(csv))
#d

Show table ‘DT::datatable()’

## show table
DT::datatable(d)
library(DT)
datatable(d)

Wrangle data

Manipulate with ‘dplyr’

library(DT)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
d <- d %>%
  # tibble
  tibble() %>% 
  #mutate time
  mutate(
    time = as.Date(substr(time, 1, 10))) %>%
  # select columns
  select(time, starts_with("total_fisheries_revenue")) %>%
  # filter rows
  filter(time >= as.Date("1981-01-01"))

datatable(d)

Tidy with ‘tidyr’

library(tidyr)

d <- d %>% 
  pivot_longer(-time)

datatable(d)

Summarize with ‘dplyr’

library(stringr)

d <- d %>% 
  mutate(region = str_replace(name, "total_fisheries_revenue_", "")) %>%
  select(time, region, value)
datatable(d)
d_sum <- d %>% 
  group_by(region) %>% 
  summarize(avg_revenue = mean(value))
datatable(d_sum) %>% 
  formatCurrency("avg_revenue")

Apply functions with ‘purrr’ on a ‘nest’‘ed ’tibble’

library(purrr)

n <- d %>% 
  group_by(region) %>% 
  nest(data = c(time, value))
n
## # A tibble: 4 x 2
## # Groups:   region [4]
##   region    data             
##   <chr>     <list>           
## 1 ca        <tibble [40 x 2]>
## 2 coastwide <tibble [40 x 2]>
## 3 or        <tibble [40 x 2]>
## 4 wa        <tibble [40 x 2]>
n <- n %>% 
  mutate(lm = map(data, function(d){
    lm(value ~ time, d)
  }),
  trend = map_dbl(lm, function(m){
    coef(summary(m))["time", "Estimate"]}))
n
## # A tibble: 4 x 4
## # Groups:   region [4]
##   region    data              lm        trend
##   <chr>     <list>            <list>    <dbl>
## 1 ca        <tibble [40 x 2]> <lm>   -0.0117 
## 2 coastwide <tibble [40 x 2]> <lm>   -0.00454
## 3 or        <tibble [40 x 2]> <lm>    0.00379
## 4 wa        <tibble [40 x 2]> <lm>    0.00333
n %>% 
  select(region, trend) %>%
  datatable()