# GoogleSheet and GoogleDrive locations
# shared to shares@iea-uploader.iam.gserviceaccount.com
gs_url <- "https://docs.google.com/spreadsheets/d/1F8H2UFcajLVqq_MIPS0YAUt3ZnSSJP7cZ1hxCsMLW4g/edit"
gd_url <- "https://drive.google.com/drive/u/2/folders/1seUbRmpwqhOyTjuWIndBql-m6Z0wvjxx"
# password authorization file for shares@iea-uploader.iam.gserviceaccount.com
gs_json <- "/Volumes/GoogleDrive/My Drive/projects/iea-auto/data/iea-uploader-27c589771060.json"
stopifnot(file.exists(gs_json))
gs4_auth(path = gs_json)
drive_auth(path = gs_json)
doc <- read_sheet(gs_url, "Documentation", skip = 2)
meta <- read_sheet(gs_url, "Indicator Metadata")
sheet_names(gs_url)
## [1] "Views" "Documentation" "Indicator Metadata"
## [4] "files_PI" "files_unmatched"
# "Views" "Documentation" "Indicator Metadata"
To test uploading experience in Meta-app.
data_files <- drive_ls(gd_url, recursive = FALSE) %>%
mutate(
url = glue("https://drive.google.com/file/d/{id}/edit"),
gd_file = glue("<a href='{url}' target='_blank'>{name}</id>"))
data_files %>%
select(gd_file) %>%
datatable(
escape=F,
extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
pageLength = nrow(data_files),
lengthMenu = c(5, 20, 50, nrow(data_files))))
Summary of indicator name in PI file
in Indicator Metadata tab of ERDDAP_CCIEA_database_2021.20210317 - Google Sheet and if match in data/ files above in gd_data
.
meta_files <- meta %>%
filter(!is.na(`PI filename`)) %>%
group_by(`PI filename`, `ERDDAP Dataset ID`) %>%
summarize(
vars_erddap = paste(`Variable Name/ERDDAP`, collapse = ", "),
.groups = "drop") %>%
mutate(
name = basename(`PI filename`)) %>%
left_join(
data_files %>%
select(name, url, gd_file), by = "name")
meta_files %>%
select(name, `PI filename`, `ERDDAP Dataset ID`, gd_file) %>%
arrange(gd_file, name, `ERDDAP Dataset ID`) %>%
datatable(
escape=F,
extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
pageLength = nrow(meta_files),
lengthMenu = c(5, 20, 50, nrow(meta_files))))
meta_files %>%
select(gd_file, name, url, `PI filename`, `ERDDAP Dataset ID`, `vars_erddap`) %>%
arrange(gd_file, name) %>%
mutate(
gd_file = ifelse(
!is.na(url),
glue('=HYPERLINK("{url}", "{name}")'),
NA) %>% gs4_formula) %>%
select(-url) %>%
write_sheet(gs_url, "files_PI")
Files found in Ben’s ca / data but not matched with PI files (indicator name in PI file
).
files_unmatched <- data_files %>%
anti_join(meta_files, by="name")
files_unmatched %>%
select(gd_file) %>%
datatable(
escape=F,
extensions = 'Buttons',
options = list(
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
pageLength = nrow(files_unmatched),
lengthMenu = c(5, 20, 50, nrow(files_unmatched))))
files_unmatched %>%
mutate(
gd_file = glue('=HYPERLINK("{url}", "{name}")') %>%
gs4_formula) %>%
select(gd_file) %>%
write_sheet(gs_url, "files_unmatched")
datatable(doc)
datatable(meta)
providers <- meta %>%
select(
pi = PI,
email = Contact) %>%
group_by_all() %>%
summarize(.groups = "drop")
write_csv(providers, "data/providers.csv")
datatable(providers)
institutions <- meta %>%
select(
institution = Institution) %>%
group_by_all() %>%
summarize(.groups = "drop")
write_csv(institutions, "data/institutions.csv")
datatable(institutions)
components <- meta %>%
select(
component = `Component Section`,
subcomponent = Subcomponent) %>%
group_by_all() %>%
summarize(.groups = "drop")
write_csv(components, "data/components.csv")
datatable(components)
datasets <- meta %>%
select(
dataset_id = `ERDDAP Dataset ID`) %>%
group_by_all() %>%
summarize(.groups = "drop")
write_csv(datasets, "data/datasets.csv")
datatable(datasets)
timeseries <- meta %>%
select(
timeseries_id = `CCIEA timeseries ID`,
dataset_id = `ERDDAP Dataset ID`,
institution = Institution) %>%
group_by_all() %>%
summarize(.groups = "drop")
write_csv(timeseries, "data/timeseries.csv")
datatable(timeseries)
regions <- meta %>%
select(
region = region) %>%
group_by_all() %>%
summarize(.groups = "drop")
write_csv(regions, "data/regions.csv")
datatable(regions)
vars <- meta %>%
select(
var_id = `Variable Name/ERDDAP`,
var_label = `Y-axis label (long variable name)`,
var_units = `Units`) %>%
group_by_all() %>%
summarize(.groups = "drop")
write_csv(vars, "data/vars.csv")
datatable(vars)
dataset_vars <- meta %>%
select(
dataset_id = `ERDDAP Dataset ID`,
var_id = `Variable Name/ERDDAP`,
fld_orig = `indicator name in PI file`,
pi = PI,
region, latitude, longitude,
institution = Institution,
sampling_freq = `Sampling frequency`,
sci_name = `Scientific name`,
component = `Component Section`,
subcomponent = Subcomponent) %>%
mutate(
across(where(is.list), as.character)) %>%
group_by_all() %>%
summarize(.groups = "drop")
write_csv(dataset_vars, "data/dataset_vars.csv")
datatable(dataset_vars)