# sample size calculation for detecting interaction

p12 <- c(0.55, 0.15, 0.25, 0.05)
pd <- c(0.10, 0.033, 0.08, 0.50)

# M is number of simulations per sample size
# N is vector of sample sizes to consider

M <- 2000
N <- seq(100,1000,100)

# generate vector to save power estimates
power <- rep(0, length(N)) 

for(n in 1:length(N)) {
	# simulate the number of people with each set of risk factors
	# group 1 has neither RF, group 2 has only RF1, 
	# group 3 has only RF2 and group 4 has both RF1 and RF2
	# for efficiency, we can simulate this M times in one matrix

	groups <- rmultinom(M, N[n], p12)

	for(m in 1:M) {
		# of those in each risk factor set, simulate disease status
		n1 <- rbinom(1, groups[1,m], pd[1])
		n2 <- rbinom(1, groups[2,m], pd[2])
		n3 <- rbinom(1, groups[3,m], pd[3])
		n4 <- rbinom(1, groups[4,m], pd[4])
		
		# generate a vector of 0's and 1's reflecting disease state in each 
		# risk factor group
		y1 <- c(rep(0, groups[1,m]-n1), rep(1, n1))
		y2 <- c(rep(0, groups[2,m]-n2), rep(1, n2))
		y3 <- c(rep(0, groups[3,m]-n3), rep(1, n3))
		y4 <- c(rep(0, groups[4,m]-n4), rep(1, n4))
		y <- c(y1, y2, y3, y4)

		# generate indicators of RF1 and RF2
		# groups 1 and 3 do not have RF1; groups 2 and 4 have RF1
		x1 <- c(rep(0, groups[1,m]), rep(1, groups[2,m]),
				rep(0, groups[3,m]), rep(1, groups[4,m]))
		# groups 1 and 2 do not have RF2; groups 3 and 4 have RF2	
		x2 <- c(rep(0, groups[1,m]), rep(0, groups[2,m]),
				rep(1, groups[3,m]), rep(1, groups[4,m]))

		# generate interaction term:
		z <- x1*x2

		# fit model
		reg <- glm(y ~ x1 + x2 + z, family=binomial)
		# get p-value
		coeftable <- summary(reg)$coefficients
		if(nrow(coeftable)==4) {
			pval <- coeftable[4,4]
			power[n] <- power[n] + ifelse(pval<0.05,1,0)/M
		}
	}
	print(n)
}


# make plot of sample size versus estimated power
plot(N, power, type="l", lwd=2, ylim=c(0,1))
points(N, power, pch=21, bg=2)
abline(h=seq(0,1,0.10), lty=3, lwd=0.5)