# sample size calculation for detecting interaction p12 <- c(0.55, 0.15, 0.25, 0.05) pd <- c(0.10, 0.033, 0.08, 0.50) # M is number of simulations per sample size # N is vector of sample sizes to consider M <- 2000 N <- seq(100,1000,100) # generate vector to save power estimates power <- rep(0, length(N)) for(n in 1:length(N)) { # simulate the number of people with each set of risk factors # group 1 has neither RF, group 2 has only RF1, # group 3 has only RF2 and group 4 has both RF1 and RF2 # for efficiency, we can simulate this M times in one matrix groups <- rmultinom(M, N[n], p12) for(m in 1:M) { # of those in each risk factor set, simulate disease status n1 <- rbinom(1, groups[1,m], pd[1]) n2 <- rbinom(1, groups[2,m], pd[2]) n3 <- rbinom(1, groups[3,m], pd[3]) n4 <- rbinom(1, groups[4,m], pd[4]) # generate a vector of 0's and 1's reflecting disease state in each # risk factor group y1 <- c(rep(0, groups[1,m]-n1), rep(1, n1)) y2 <- c(rep(0, groups[2,m]-n2), rep(1, n2)) y3 <- c(rep(0, groups[3,m]-n3), rep(1, n3)) y4 <- c(rep(0, groups[4,m]-n4), rep(1, n4)) y <- c(y1, y2, y3, y4) # generate indicators of RF1 and RF2 # groups 1 and 3 do not have RF1; groups 2 and 4 have RF1 x1 <- c(rep(0, groups[1,m]), rep(1, groups[2,m]), rep(0, groups[3,m]), rep(1, groups[4,m])) # groups 1 and 2 do not have RF2; groups 3 and 4 have RF2 x2 <- c(rep(0, groups[1,m]), rep(0, groups[2,m]), rep(1, groups[3,m]), rep(1, groups[4,m])) # generate interaction term: z <- x1*x2 # fit model reg <- glm(y ~ x1 + x2 + z, family=binomial) # get p-value coeftable <- summary(reg)$coefficients if(nrow(coeftable)==4) { pval <- coeftable[4,4] power[n] <- power[n] + ifelse(pval<0.05,1,0)/M } } print(n) } # make plot of sample size versus estimated power plot(N, power, type="l", lwd=2, ylim=c(0,1)) points(N, power, pch=21, bg=2) abline(h=seq(0,1,0.10), lty=3, lwd=0.5)