Data Organization

Formatting data as list() objects and DataStacks for model input.

Overview

Organizing data for a three-part joint model can be complex. This script walks through DataStack construction where a DataStack is a list object that holds all data for model fitting. Because this is a three-part model, DataStacks will be created for each submodel (e.g., an ILI submodel, a NREVSS submodel, and a HHS submodel) then the three submodel DataStacks will be combined as a joint_stack. As part of DataStack construction, a three-part response/dependent variable will be specified as a three-column matrix.

Note: This script requires that data Preprocessing has already been completed and all Preprocessing objects are available in the working environment.

ILI DataStack

The ILI model will occupy the base level of the joint model and pass information to both the middle level submodel (NREVSS) and the top level HHS submodel.

Hide code
ilinet_full <- as.data.frame(ilinet_full) %>%
  mutate(y_ili = unweighted.s, #Response variable, logit transformed percentage of influenza-like illnesses.
         l.pop.ili = log(population), #log population size
         Region.1.ili = Region, #an identifier for each location (i.e., States and Territories)
         Region.2.ili = Region,
         Region.3.ili = Region,
         Region.4.ili = Region,
         ts_week.1.ili = ts_week, #integer time step for epiweeks 2010 to 2023-epiweek-22
         ts_week.2.ili = ts_week,
         ts_week.3.ili = ts_week,
         ts_week.4.ili = ts_week,
         state.ili = abbreviation, #State abbreviations
         year.ili = as.integer(as.factor(year)), #integer time step for years
         year.ili.1 = as.integer(as.factor(year)),
         year.ili.2 = as.integer(as.factor(year)),
         year.ili.3 = as.integer(as.factor(year)),
         source = 1) #just an ID to identify ILI data



ili.lst = list(list(intercept1 = rep(1, dim(ilinet_full)[1])), #custom intercept
          list(l.pop.ili = ilinet_full[,"l.pop.ili"],  #log-population              
               year.ili = ilinet_full[,"year.ili"],
               en_est.s = ilinet_full[,"en_est.s"], #amplitude of temporal trend in FluSurv cases
               year.ili.1 = ilinet_full[,"year.ili.1"],
               year.ili.2 = ilinet_full[,"year.ili.2"],
               year.ili.3 = ilinet_full[,"year.ili.3"],
               ts_week = ilinet_full[,"ts_week.1.ili"],
               ts_week.fs = ilinet_full[,"ts_week.1.ili"],
               ts_week.iid = ilinet_full[,"ts_week.1.ili"],
               ts_week.1.ili = ilinet_full[,"ts_week.1.ili"],
               ts_week.2.ili = ilinet_full[,"ts_week.2.ili"],
               ts_week.3.ili = ilinet_full[,"ts_week.3.ili"],
               Region = ilinet_full[,"Region.1.ili"],
               Region.1 = ilinet_full[,"Region.1.ili"],
               Region.2 = ilinet_full[,"Region.1.ili"],
               Region.cross = ilinet_full[,"Region.1.ili"],
               Region.1.ili = ilinet_full[,"Region.1.ili"],
               Region.2.ili = ilinet_full[,"Region.2.ili"],
               Region.3.ili = ilinet_full[,"Region.3.ili"],
               Region.4.ili = ilinet_full[,"Region.4.ili"],
               st_int.ili = ilinet_full[,"st_int"], #State-Time interactions
               state.ili = ilinet_full[,"state.ili"],
               dsource = ilinet_full[,"source"]))

# The ILI DataStack
ili.stk = inla.stack(data = list(Y = cbind(ilinet_full$y_ili, NA, NA)), #base level response in 1st slot, NAs are placeholders for other two
                                      A = list(1,1),  #optional matrix object, not used in this analysis     
                                effects = ili.lst,    # list object created above with indices and variables   
                                    tag = "ili")      #arbitrary name/label to pull data later

NREVSS Set

Similar to ILI DataStack above, creating one for the NREVSS submodel. Note that some variable names are the same as in the ILI DataStack (e.g., Region and st_int) BUT some are specific to this DataStack with a .nv designation: If a variable name is included in the joint model formula, it can only contribute to the submodels that also have it. This provides control such that some variables may be specific to one submodel or one DataStack, whereas other variables may be applicable to all submodels concurrently.

Hide code
nrevss_full <- as.data.frame(nrevss_full) %>%
  mutate(y_nrvs = Apos.s, #confirmed positive influenza A, logit transformed percentage
         l.pop.nv = log(population),
         Region.1.nv = Region,
         Region.2.nv = Region,
         Region.3.nv = Region,
         Region.4.nv = Region,
         ts_week.1.nv = ts_week,
         ts_week.2.nv = ts_week,
         ts_week.3.nv = ts_week,
         ts_week.4.nv = ts_week,
         state.nv = abbreviation,
         year.nv = as.integer(as.factor(year)),
         year.nv.1 = as.integer(as.factor(year)),
         year.nv.2 = as.integer(as.factor(year)),
         year.nv.3 = as.integer(as.factor(year)),
         source = 2)

nv.lst = list(list(intercept2 = rep(1, dim(nrevss_full)[1])), # custom intercept
          list(l.pop.nv = nrevss_full[,"l.pop.nv"], 
               en_est.s = nrevss_full[,"en_est.s"],
               year.nv = nrevss_full[,"year.nv"],
               year.nv.1 = nrevss_full[,"year.nv.1"],
               year.nv.2 = nrevss_full[,"year.nv.2"],
               year.nv.3 = nrevss_full[,"year.nv.3"],
               ts_week = nrevss_full[,"ts_week.1.nv"],
               ts_week.fs = nrevss_full[,"ts_week.1.nv"],
               ts_week.iid = nrevss_full[,"ts_week.1.nv"],
               ts_week.1.nv = nrevss_full[,"ts_week.1.nv"],
               ts_week.2.nv = nrevss_full[,"ts_week.2.nv"],
               ts_week.3.nv = nrevss_full[,"ts_week.3.nv"],
               Region = nrevss_full[,"Region.1.nv"],
               Region.1 = nrevss_full[,"Region.1.nv"],
               Region.2 = nrevss_full[,"Region.1.nv"],
               Region.cross = nrevss_full[,"Region.1.nv"],
               Region.1.nv = nrevss_full[,"Region.1.nv"],
               Region.2.nv = nrevss_full[,"Region.2.nv"],
               Region.3.nv = nrevss_full[,"Region.3.nv"],
               Region.4.nv = nrevss_full[,"Region.4.nv"],
               st_int = nrevss_full[,"st_int"],
               st_int.nv = nrevss_full[,"st_int"],
               state.nv = nrevss_full[,"state.nv"],
               dsource = nrevss_full[,"source"]))


#NREVSS DataStack
nv.stk = inla.stack(data = list(Y = cbind(NA, nrevss_full$y_nrvs, NA)), #NA in the first and last columns (for ILI and HHS responses)
                                      A = list(1,1),       
                                effects = nv.lst,        
                                    tag = "nrvs")

HHS Set

Much the same as the ILI and NREVVS DataStacks.

Hide code
flu_HHS_full <- as.data.frame(flu_HHS_full) %>%
  mutate(y_hhs = log(hosp_inc+0.0001), #log hospital incidence (response variable)
         l.pop.hhs = log(population),
         Region.1.hhs = Region,
         Region.2.hhs = Region,
         Region.3.hhs = Region,
         Region.4.hhs = Region,
         ts_week.1.hhs = ts_week,
         ts_week.2.hhs = ts_week,
         ts_week.3.hhs = ts_week,
         ts_week.4.hhs = ts_week,
         state.hhs = abbreviation,
         year.hhs = as.integer(as.factor(year)),
         year.hhs.1 = as.integer(as.factor(year)),
         year.hhs.2 = as.integer(as.factor(year)),
         year.hhs.3 = as.integer(as.factor(year)),
         source = 3)

hhs.lst = list(list(intercept3 = rep(1, dim(flu_HHS_full)[1])), 
          list(l.pop.hhs = flu_HHS_full[,"l.pop.hhs"], 
               en_est.s = flu_HHS_full[,"en_est.s"],
               year.hhs = flu_HHS_full[,"year.hhs"],
               year.hhs.1 = flu_HHS_full[,"year.hhs"],
               year.hhs.2 = flu_HHS_full[,"year.hhs"],
               year.hhs.3 = flu_HHS_full[,"year.hhs"],
               ts_week.fs = flu_HHS_full[,"ts_week.1.hhs"],
               ts_week.c = flu_HHS_full[,"ts_week.1.hhs"],
               ts_week.iid.c = flu_HHS_full[,"ts_week.1.hhs"],
               ts_week.1.hhs = flu_HHS_full[,"ts_week.1.hhs"],
               ts_week.2.hhs = flu_HHS_full[,"ts_week.2.hhs"],
               ts_week.3.hhs = flu_HHS_full[,"ts_week.3.hhs"],
               Region = flu_HHS_full[,"Region.1.hhs"],
               Region.c = flu_HHS_full[,"Region.1.hhs"],
               Region.1.c = flu_HHS_full[,"Region.1.hhs"],
               Region.2.c = flu_HHS_full[,"Region.1.hhs"],
               Region.1.hhs = flu_HHS_full[,"Region.1.hhs"],
               Region.2.hhs = flu_HHS_full[,"Region.2.hhs"],
               Region.3.hhs = flu_HHS_full[,"Region.3.hhs"],
               Region.4.hhs = flu_HHS_full[,"Region.4.hhs"],
               state.hhs = flu_HHS_full[,"state.hhs"],
               st_int.c = flu_HHS_full[,"st_int"],
               dsource = flu_HHS_full[,"source"]))

# HHS DataStack  
hhs.stk = inla.stack(data = list(Y = cbind(NA, NA, flu_HHS_full$y_hhs)), #NAs for ILI and NREVESS responses, HHS in 3rd spot
                                      A = list(1,1),       
                                effects = hhs.lst,        
                                    tag = "hhs")   

Join Datastacks

Joining all three above DataStacks to a common object, joint DataStack. This is all the data needed for running the model.

Hide code
joint_stack <- inla.stack(ili.stk, nv.stk, hhs.stk)