Code
# install.packages(c("tidyverse", "RSQLite", "DBI", "rstudioapi", "DT"))SQLite is the most widely deployed database engine in the world (see SQLite.org). Unlike client-server database management systems (DBMS) like PostgreSQL or Oracle, an SQLite database is a single, self-contained disk file.
From a curation perspective, SQLite is a highly sustainable format. The Library of Congress lists it as a preferred format for datasets due to its open specification and long-term stability. Our goal is to “unbox” the database to ensure all tables are documented and accessible.
“Self-Contained” does not imply “Self-Documenting.” A curator receiving a .db file often faces a Black Box. Without a schema map, relationships between tables (Foreign Keys) can be lost, making the data contextually unusable.
We use RSQLite (Müller et al. 2024) to interact with the database files directly from R.
# install.packages(c("tidyverse", "RSQLite", "DBI", "rstudioapi", "DT"))library(tidyverse)
library(DBI)
library(RSQLite)
library(rstudioapi)
library(DT)if (interactive() && .Platform$OS.type == "windows") {
selected_dir <- rstudioapi::selectDirectory(caption = "Select SQLite Directory")
} else {
selected_dir <- NULL
}
target_dir <- if (!is.null(selected_dir)) selected_dir else params$target_dir
print(paste("Analyzing directory:", target_dir))[1] "Analyzing directory: data/Inspect_sqlite/"
This process iterates through all SQLite files, connects to them, and extracts the structure of every table found.
db_files <- list.files(target_dir, pattern = "\\.(sqlite|db|sqlite3)$", full.names = TRUE, recursive = TRUE)
inspect_sqlite <- function(file_path) {
con <- dbConnect(SQLite(), file_path)
tables <- dbListTables(con)
details <- map_dfr(tables, function(tbl) {
# Get column info
cols <- dbGetQuery(con, paste0("PRAGMA table_info(", tbl, ")"))
# Get row count
row_count <- dbGetQuery(con, paste0("SELECT COUNT(*) as count FROM ", tbl))$count
tibble(
file = basename(file_path),
table_name = tbl,
columns = paste(cols$name, collapse = ", "),
row_count = row_count
)
})
dbDisconnect(con)
return(details)
}
if (length(db_files) > 0) {
db_inventory <- map_dfr(db_files, inspect_sqlite)
datatable(db_inventory, caption = "Inventory of Tables and Row Counts")
} else {
message("No SQLite files found.")
}We save the database manifest to support long-term preservation documentation.
if (exists("db_inventory")) {
output_dir <- "Results/Inspect_sqlite"
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
timestamp <- format(Sys.Date(), "%Y%m%d")
write.csv(db_inventory, file.path(output_dir, paste0("Database_Manifest_", timestamp, ".csv")), row.names = FALSE)
message("Report saved to: ", output_dir)
}