maintenance-guide.RmdFor: Package maintainers and developers Purpose: Guide for maintaining, updating, and extending the ideadata package
# Install development tools
install.packages(c(
"devtools", # Package development
"usethis", # Package development helpers
"testthat", # Testing framework
"roxygen2", # Documentation generation
"pkgdown", # Website generation
"styler", # Code formatting
"lintr" # Code linting
))ideadata/
├── DESCRIPTION # Package metadata and dependencies
├── NAMESPACE # Auto-generated from roxygen
├── NEWS.md # Changelog
├── README.md # Package overview
├── ideadata.Rproj # RStudio project file
│
├── R/ # R source code
│ ├── connections.R # Connection management
│ ├── get_table.R # Core table retrieval
│ ├── collector.R # Large data collection
│ ├── kinit.R # Kerberos authentication
│ ├── utils.R # Utilities & credential setup
│ ├── get_PROD1.R # PowerSchool functions
│ ├── get_PROD2.R # Assessment functions
│ ├── get_*.R # Other data-specific functions
│ ├── warehouse_meta_data.R # Metadata handling
│ ├── zzz.R # Package startup hooks
│ └── view_connection_pane.R # RStudio integration
│
├── man/ # Auto-generated documentation
│ └── *.Rd # Help files (don't edit manually)
│
├── vignettes/ # Long-form documentation
│ ├── setup-creds.Rmd # Credential setup guide
│ ├── using-get-table.Rmd # get_table() guide
│ └── using-collector.Rmd # collector() guide
│
├── tests/ # Test suite
│ ├── testthat.R # Test runner
│ └── testthat/ # Test files
│ └── test-*.R # Individual test files
│
├── docs/ # pkgdown website
│ └── ... # Generated by pkgdown
│
└── _pkgdown.yml # pkgdown configuration
# 1. Load package for development
devtools::load_all()
# 2. Make changes to R code
# 3. Test your changes
devtools::test() # Run all tests
devtools::test_active_file() # Test current file only
# 4. Update documentation
devtools::document()
# 5. Check package
devtools::check()
# 6. Install locally to test
devtools::install()
# 7. Build website (if documentation changed)
pkgdown::build_site()# Create feature branch
git checkout -b feature/new-function-name
# Make changes, commit regularly
git add R/new_function.R
git commit -m "Add get_new_data() function"
# Push to GitHub
git push origin feature/new-function-name
# Create pull request on GitHub
# After review and approval, merge to mainget_*() Function
This is the most common pattern for adding data access functions.
# In R/get_MYDATA.R
#' Get My New Data
#'
#' Retrieves data from the MyDatabase table in the data warehouse.
#'
#' @return A lazy `tbl_sql` object. Use `dplyr::collect()` to execute the query.
#' @export
#' @family data retrieval functions
#'
#' @examples
#' \dontrun{
#' # Get all data
#' my_data <- get_my_data() %>% collect()
#'
#' # Filter before collecting
#' filtered <- get_my_data() %>%
#' filter(year >= 2020) %>%
#' collect()
#' }
get_my_data <- function() {
get_table(
.table_name = "MyTableName",
.database_name = "MyDatabaseName",
.server_name = "RGVPDSD-DWPRD1" # or appropriate server
)
}
# Roxygen tags needed:
#' @export # Make function available to users
#' @family group_name # Group in documentation
#' @examples # Usage examples
#' @param param_name # Parameter descriptions (if any)
#' @return # What the function returns
# In tests/testthat/test-get_my_data.R
test_that("get_my_data() returns a lazy query", {
skip_if_not(exists("conn_MyDatabaseName"))
result <- get_my_data()
expect_s3_class(result, "tbl_sql")
expect_true("tbl_lazy" %in% class(result))
})
test_that("get_my_data() can be collected", {
skip_if_not(exists("conn_MyDatabaseName"))
result <- get_my_data() %>%
head() %>%
collect()
expect_s3_class(result, "tbl_df")
})For related functions (questions, results, responses, etc.):
#' @name aees_functions
#' @title AEES Survey Data Functions
#' @description Functions for accessing Annual Employee Experience Survey data
NULL
#' Get AEES Questions
#' @rdname aees_functions
#' @export
get_aees_questions <- function() {
get_table(.table_name = "Questions", .database_name = "AEES")
}
#' Get AEES Results
#' @rdname aees_functions
#' @export
get_aees_results <- function() {
get_table(.table_name = "Results", .database_name = "AEES")
}
#' Get AEES Open-Ended Responses
#' @rdname aees_functions
#' @export
get_aees_oe_responses <- function() {
get_table(.table_name = "OE_Responses", .database_name = "AEES")
}
#' Get Historical Data for Date Range
#'
#' @param start_date Start date (character or Date)
#' @param end_date End date (character or Date)
#' @param school_ids Optional vector of school IDs to filter
#'
#' @return A lazy `tbl_sql` object
#' @export
#'
#' @examples
#' \dontrun{
#' # All schools for 2024
#' data <- get_historical_data("2024-01-01", "2024-12-31") %>%
#' collect()
#'
#' # Specific schools
#' data <- get_historical_data(
#' "2024-01-01",
#' "2024-12-31",
#' school_ids = c(12345, 67890)
#' ) %>%
#' collect()
#' }
get_historical_data <- function(start_date, end_date, school_ids = NULL) {
query <- get_table(
.table_name = "HistoricalData",
.database_name = "Archive"
) %>%
filter(date >= !!start_date, date <= !!end_date)
if (!is.null(school_ids)) {
query <- query %>%
filter(school_id %in% !!school_ids)
}
query
}
# tests/testthat/test-connections.R - Connection tests
# tests/testthat/test-get_table.R - Core function tests
# tests/testthat/test-collector.R - Collector tests
# tests/testthat/test-get_students.R - Student data tests
# etc.
# Basic test structure
test_that("function does what it should", {
# Arrange
input <- "test_value"
# Act
result <- my_function(input)
# Assert
expect_equal(result, expected_output)
})
# Skip tests if connection unavailable
test_that("get_students() works", {
skip_if_not(exists("conn_PowerSchool"))
students <- get_students()
# Test lazy object returned
expect_s3_class(students, "tbl_sql")
# Test collection works
sample <- students %>% head(10) %>% collect()
expect_s3_class(sample, "tbl_df")
expect_true(nrow(sample) <= 10)
})Every exported function needs roxygen documentation:
#' Brief Title
#'
#' Longer description explaining what the function does,
#' when to use it, and any important details.
#'
#' @param param1 Description of first parameter
#' @param param2 Description of second parameter
#'
#' @return Description of what is returned
#'
#' @export
#' @family function_group
#'
#' @examples
#' \dontrun{
#' # Example 1
#' result <- my_function("input")
#'
#' # Example 2
#' result <- my_function("input") %>%
#' filter(x > 10) %>%
#' collect()
#' }
my_function <- function(param1, param2) {
# function body
}@export - Make function available to users@param name description - Document parameters@return description - Document return value@examples - Usage examples (use \dontrun{}
for code requiring authentication)@family group_name - Group related functions@seealso - Link to related functions@importFrom pkg function - Import specific function
from package
# Generate man/*.Rd files from roxygen comments
devtools::document()
# Preview help file
?my_functionCreate long-form documentation:
# Create new vignette
usethis::use_vignette("my-new-guide")Edit the created .Rmd file in vignettes/,
then:
# Build vignettes
devtools::build_vignettes()
# Preview
devtools::build_rmd("vignettes/my-new-guide.Rmd")The package website is built with pkgdown.
# Build entire site
pkgdown::build_site()
# Just update reference
pkgdown::build_reference()
# Just update articles
pkgdown::build_articles()_pkgdown.yml
url: https://idea-analytics.github.io/ideadata
template:
bootstrap: 5
reference:
- title: "Connection Functions"
desc: "Functions for managing database connections"
contents:
- create_connection
- check_get_connection
- title: "PowerSchool Data"
desc: "Functions for accessing PowerSchool data"
contents:
- get_students
- get_schools
- get_student_daily_attendanceUse Semantic Versioning:
MAJOR.MINOR.PATCH
Edit DESCRIPTION:
Version: 4.1.0
Update NEWS.md:
# ideadata 4.1.0
## New Features
* Added `get_new_data()` function for accessing new data source
* Added support for filtering by region in `get_students()`
## Bug Fixes
* Fixed issue with date filtering in `get_attendance()`
* Corrected column naming in `collector()`
## Internal
* Updated tests for dbplyr 2.5.0 compatibility
* Improved error messages for connection failures
# 1. Update version in DESCRIPTION
# Version: 4.1.0
# 2. Update NEWS.md with changes
# 3. Run full check
devtools::check()
# 4. Run tests
devtools::test()
# 5. Update documentation
devtools::document()
# 6. Build website
pkgdown::build_site()
# 7. Check reverse dependencies (if any)
devtools::revdep_check()
# 8. Spell check
spelling::spell_check_package()When a dependency updates (e.g., dbplyr, dplyr):
Test compatibility:
# Install new version
install.packages("dbplyr")
# Run full tests
devtools::test()
devtools::check()Update DESCRIPTION if minimum version required:
Imports:
dbplyr (>= 2.5.0),
dplyr (>= 1.1.0)
Update NEWS.md:
When IDEA adds a new SQL Server:
No code changes needed! The metadata system handles this automatically.
Verify metadata updates:
Create convenience functions if needed:
get_new_server_data <- function() {
get_table(
.table_name = "TableName",
.database_name = "DatabaseName",
.server_name = "NEW-SERVER-NAME"
)
}If table schema changes:
Document in NEWS.md:
Consider wrapper functions to maintain backwards compatibility:
Communicate to users via email/Slack about changes
The metadata is fetched on package load. To manually refresh:
# Current approach: restart R
.rs.restartR()
library(ideadata)
# Future enhancement: could add refresh function
refresh_metadata <- function() {
warehouse_meta_data <<- get_warehouse_meta_data()
}Cause: Tests run in clean environment without database credentials.
Solution: Tests automatically skip if connection unavailable:
test_that("my test", {
skip_if_not(exists("conn_PowerSchool"))
# test code
})check() fails with “Object not found”
Cause: Missing import or incorrect NAMESPACE.
Solution:
# Re-generate NAMESPACE
devtools::document()
# Check imports in function files
#' @importFrom dplyr filter select mutateCause: Often due to examples that require authentication.
Solution: Wrap examples in
\dontrun{}:
#' @examples
#' \dontrun{
#' data <- get_students() %>% collect()
#' }Cause: Credentials not set or ticket expired.
Solution:
# Check credentials
Sys.getenv("IDEA_RNA_DB_UID")
Sys.getenv("IDEA_RNA_DB_PWD")
# Re-run setup
setup_creds()
.rs.restartR()
# Use styler to format code
styler::style_pkg()
# Use lintr to check code
lintr::lint_package()
# Function names: lowercase with underscores
get_student_data <- function() { }
# Variables: lowercase with underscores
my_variable <- 10
# Constants: uppercase with underscores
MY_CONSTANT <- "value"
# Private functions: prefix with dot
.helper_function <- function() { }
# Always return lazy queries
get_something <- function() {
get_table(.table_name = "Something")
}
# Use !! for parameter values in dplyr verbs
get_filtered <- function(filter_value) {
get_table("Data") %>%
filter(column == !!filter_value)
}
# Document lazy evaluation in return
#' @return A lazy `tbl_sql` object. Use `dplyr::collect()` to pull data.Consider setting up GitHub Actions for:
# .github/workflows/R-CMD-check.yaml
name: R-CMD-check
on: [push, pull_request]
jobs:
check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: r-lib/actions/setup-r@v2
- name: Install dependencies
run: Rscript -e "install.packages('remotes'); remotes::install_deps()"
- name: Check
run: Rscript -e "devtools::check()"Note: Tests requiring database access would need to be skipped in CI.