Data Organization
Overview
Organizing data for a three-part joint model can be complex. This script walks through DataStack construction where a DataStack is a list object that holds all data for model fitting. Because this is a three-part model, DataStacks will be created for each submodel (e.g., an ILI submodel, a NREVSS submodel, and a HHS submodel) then the three submodel DataStacks will be combined as a joint_stack. As part of DataStack construction, a three-part response/dependent variable will be specified as a three-column matrix.
Note: This script requires that data Preprocessing has already been completed and all Preprocessing objects are available in the working environment.
ILI DataStack
The ILI model will occupy the base level of the joint model and pass information to both the middle level submodel (NREVSS) and the top level HHS submodel.
Hide code
<- as.data.frame(ilinet_full) %>%
ilinet_full mutate(y_ili = unweighted.s, #Response variable, logit transformed percentage of influenza-like illnesses.
l.pop.ili = log(population), #log population size
Region.1.ili = Region, #an identifier for each location (i.e., States and Territories)
Region.2.ili = Region,
Region.3.ili = Region,
Region.4.ili = Region,
ts_week.1.ili = ts_week, #integer time step for epiweeks 2010 to 2023-epiweek-22
ts_week.2.ili = ts_week,
ts_week.3.ili = ts_week,
ts_week.4.ili = ts_week,
state.ili = abbreviation, #State abbreviations
year.ili = as.integer(as.factor(year)), #integer time step for years
year.ili.1 = as.integer(as.factor(year)),
year.ili.2 = as.integer(as.factor(year)),
year.ili.3 = as.integer(as.factor(year)),
source = 1) #just an ID to identify ILI data
= list(list(intercept1 = rep(1, dim(ilinet_full)[1])), #custom intercept
ili.lst list(l.pop.ili = ilinet_full[,"l.pop.ili"], #log-population
year.ili = ilinet_full[,"year.ili"],
en_est.s = ilinet_full[,"en_est.s"], #amplitude of temporal trend in FluSurv cases
year.ili.1 = ilinet_full[,"year.ili.1"],
year.ili.2 = ilinet_full[,"year.ili.2"],
year.ili.3 = ilinet_full[,"year.ili.3"],
ts_week = ilinet_full[,"ts_week.1.ili"],
ts_week.fs = ilinet_full[,"ts_week.1.ili"],
ts_week.iid = ilinet_full[,"ts_week.1.ili"],
ts_week.1.ili = ilinet_full[,"ts_week.1.ili"],
ts_week.2.ili = ilinet_full[,"ts_week.2.ili"],
ts_week.3.ili = ilinet_full[,"ts_week.3.ili"],
Region = ilinet_full[,"Region.1.ili"],
Region.1 = ilinet_full[,"Region.1.ili"],
Region.2 = ilinet_full[,"Region.1.ili"],
Region.cross = ilinet_full[,"Region.1.ili"],
Region.1.ili = ilinet_full[,"Region.1.ili"],
Region.2.ili = ilinet_full[,"Region.2.ili"],
Region.3.ili = ilinet_full[,"Region.3.ili"],
Region.4.ili = ilinet_full[,"Region.4.ili"],
st_int.ili = ilinet_full[,"st_int"], #State-Time interactions
state.ili = ilinet_full[,"state.ili"],
dsource = ilinet_full[,"source"]))
# The ILI DataStack
= inla.stack(data = list(Y = cbind(ilinet_full$y_ili, NA, NA)), #base level response in 1st slot, NAs are placeholders for other two
ili.stk A = list(1,1), #optional matrix object, not used in this analysis
effects = ili.lst, # list object created above with indices and variables
tag = "ili") #arbitrary name/label to pull data later
NREVSS Set
Similar to ILI DataStack above, creating one for the NREVSS submodel. Note that some variable names are the same as in the ILI DataStack (e.g., Region and st_int) BUT some are specific to this DataStack with a .nv designation: If a variable name is included in the joint model formula, it can only contribute to the submodels that also have it. This provides control such that some variables may be specific to one submodel or one DataStack, whereas other variables may be applicable to all submodels concurrently.
Hide code
<- as.data.frame(nrevss_full) %>%
nrevss_full mutate(y_nrvs = Apos.s, #confirmed positive influenza A, logit transformed percentage
l.pop.nv = log(population),
Region.1.nv = Region,
Region.2.nv = Region,
Region.3.nv = Region,
Region.4.nv = Region,
ts_week.1.nv = ts_week,
ts_week.2.nv = ts_week,
ts_week.3.nv = ts_week,
ts_week.4.nv = ts_week,
state.nv = abbreviation,
year.nv = as.integer(as.factor(year)),
year.nv.1 = as.integer(as.factor(year)),
year.nv.2 = as.integer(as.factor(year)),
year.nv.3 = as.integer(as.factor(year)),
source = 2)
= list(list(intercept2 = rep(1, dim(nrevss_full)[1])), # custom intercept
nv.lst list(l.pop.nv = nrevss_full[,"l.pop.nv"],
en_est.s = nrevss_full[,"en_est.s"],
year.nv = nrevss_full[,"year.nv"],
year.nv.1 = nrevss_full[,"year.nv.1"],
year.nv.2 = nrevss_full[,"year.nv.2"],
year.nv.3 = nrevss_full[,"year.nv.3"],
ts_week = nrevss_full[,"ts_week.1.nv"],
ts_week.fs = nrevss_full[,"ts_week.1.nv"],
ts_week.iid = nrevss_full[,"ts_week.1.nv"],
ts_week.1.nv = nrevss_full[,"ts_week.1.nv"],
ts_week.2.nv = nrevss_full[,"ts_week.2.nv"],
ts_week.3.nv = nrevss_full[,"ts_week.3.nv"],
Region = nrevss_full[,"Region.1.nv"],
Region.1 = nrevss_full[,"Region.1.nv"],
Region.2 = nrevss_full[,"Region.1.nv"],
Region.cross = nrevss_full[,"Region.1.nv"],
Region.1.nv = nrevss_full[,"Region.1.nv"],
Region.2.nv = nrevss_full[,"Region.2.nv"],
Region.3.nv = nrevss_full[,"Region.3.nv"],
Region.4.nv = nrevss_full[,"Region.4.nv"],
st_int = nrevss_full[,"st_int"],
st_int.nv = nrevss_full[,"st_int"],
state.nv = nrevss_full[,"state.nv"],
dsource = nrevss_full[,"source"]))
#NREVSS DataStack
= inla.stack(data = list(Y = cbind(NA, nrevss_full$y_nrvs, NA)), #NA in the first and last columns (for ILI and HHS responses)
nv.stk A = list(1,1),
effects = nv.lst,
tag = "nrvs")
HHS Set
Much the same as the ILI and NREVVS DataStacks.
Hide code
<- as.data.frame(flu_HHS_full) %>%
flu_HHS_full mutate(y_hhs = log(hosp_inc+0.0001), #log hospital incidence (response variable)
l.pop.hhs = log(population),
Region.1.hhs = Region,
Region.2.hhs = Region,
Region.3.hhs = Region,
Region.4.hhs = Region,
ts_week.1.hhs = ts_week,
ts_week.2.hhs = ts_week,
ts_week.3.hhs = ts_week,
ts_week.4.hhs = ts_week,
state.hhs = abbreviation,
year.hhs = as.integer(as.factor(year)),
year.hhs.1 = as.integer(as.factor(year)),
year.hhs.2 = as.integer(as.factor(year)),
year.hhs.3 = as.integer(as.factor(year)),
source = 3)
= list(list(intercept3 = rep(1, dim(flu_HHS_full)[1])),
hhs.lst list(l.pop.hhs = flu_HHS_full[,"l.pop.hhs"],
en_est.s = flu_HHS_full[,"en_est.s"],
year.hhs = flu_HHS_full[,"year.hhs"],
year.hhs.1 = flu_HHS_full[,"year.hhs"],
year.hhs.2 = flu_HHS_full[,"year.hhs"],
year.hhs.3 = flu_HHS_full[,"year.hhs"],
ts_week.fs = flu_HHS_full[,"ts_week.1.hhs"],
ts_week.c = flu_HHS_full[,"ts_week.1.hhs"],
ts_week.iid.c = flu_HHS_full[,"ts_week.1.hhs"],
ts_week.1.hhs = flu_HHS_full[,"ts_week.1.hhs"],
ts_week.2.hhs = flu_HHS_full[,"ts_week.2.hhs"],
ts_week.3.hhs = flu_HHS_full[,"ts_week.3.hhs"],
Region = flu_HHS_full[,"Region.1.hhs"],
Region.c = flu_HHS_full[,"Region.1.hhs"],
Region.1.c = flu_HHS_full[,"Region.1.hhs"],
Region.2.c = flu_HHS_full[,"Region.1.hhs"],
Region.1.hhs = flu_HHS_full[,"Region.1.hhs"],
Region.2.hhs = flu_HHS_full[,"Region.2.hhs"],
Region.3.hhs = flu_HHS_full[,"Region.3.hhs"],
Region.4.hhs = flu_HHS_full[,"Region.4.hhs"],
state.hhs = flu_HHS_full[,"state.hhs"],
st_int.c = flu_HHS_full[,"st_int"],
dsource = flu_HHS_full[,"source"]))
# HHS DataStack
= inla.stack(data = list(Y = cbind(NA, NA, flu_HHS_full$y_hhs)), #NAs for ILI and NREVESS responses, HHS in 3rd spot
hhs.stk A = list(1,1),
effects = hhs.lst,
tag = "hhs")
Join Datastacks
Joining all three above DataStacks to a common object, joint DataStack. This is all the data needed for running the model.
Hide code
<- inla.stack(ili.stk, nv.stk, hhs.stk) joint_stack