lambdaEst {BookEKM} | R Documentation |
The function wraps up and extends on code in Example 7.4 in the book EKM (2016) by providing solutions to an exercise formulated by Petter Mostad
lambdaEst(P, data, pl = TRUE, first = log(0.01), last = log(10000), ngrid = 1000)
P |
Matrix or array (if more than two markers) with probabilities corresponding to LE (F rather than P used in Example 7.4.) |
data |
Data or pseudoconts of non-negative integers on same format as |
pl |
Logical. Plots if |
first |
Real number. Log of lower bound for lambda estimate |
last |
Real number. Log of upper bound for lambda estimate |
ngrid |
Number of points for which the log likelihood is calculated |
The maximum value for the likelihood is found by brute force.
lambda estimate
Thore.Egeland@gmail.com
Egeland, Kling and Mostad (2016)
###Example 1 (Example 7.4 in book) freq1 <- c(0.1, 0.3, 0.6) freq2 <- c(0.1, 0.2, 0.3, 0.4) P <- outer(freq1,freq2) # same as P <- freq1%o%freq2 data1 <- rbind(c(1,2,3,5), c(0,5,9,11), c(6,12,20,26)) res1 <- lambdaEst(P,data1) #Some LD introduced data2 <- data1 data2[2:3,3:4] <- data2[2:3,3:4]+ c(10,-10,-10,10) res2 <- lambdaEst(P,data2) ###Example 2. Solution exercise, part 1. # First some functions are defined to sample from the dirichlet prior # and simulate the datbase. rdirichlet <- function (alpha) { v <- rgamma(length(alpha), alpha) v/sum(v) } rprior <- function(P,lambda) { array(rdirichlet(lambda*P),dim(P)) } rdatabase <- function(N, P, lambda) { array(rmultinom(1, N, rprior(P, lambda)), dim(P)) } # The next functions solves part 1,2 and 3 of the mentioned exercise. lambdaSim <- function(P,lambda, N=1000, nsim=100, first=log(0.001), last=log(10000),ngrid=1000, pl1=FALSE, pl2=TRUE){ lambda.vec <- exp(seq(first,last, length.out=ngrid)) res <- NULL loglik <- 0 dat <- vector("list",nsim) for (i in 1: nsim){ dat[[i]] <- rdatabase(N, P, lambda) foo <- lambdaEst(P,dat[[i]],pl=FALSE,first=first,last=last,ngrid=ngrid) res <- c(res, foo$lambda.est) loglik <- loglik + foo$loglik } est <- lambda.vec[which.max(loglik)] if(pl1) plot(density(res,from=0), main=paste("lambda: ", lambda, "est: ", round(est,3))) if(pl2) plot(lambda.vec, loglik,type="l", xlab="lambda", ylab="log likelihood") list(allLambda=res,est=est, simData=dat) } # The figure for part 1 of the exercise: freq1 <- c(0.1, 0.3, 0.6) freq2 <- c(0.1, 0.2, 0.3, 0.4) P <- outer(freq1,freq2) set.seed(17) res <- lambdaSim(P, lambda=30, last =log(1000), nsim=1, pl2 = TRUE) abline(v=30) # The estimate, part 2 res$est # Simulation with known lambda and estimation. Figure 2 par(mfcol=c(3,1)) data.frame(lambda0.1 = lambdaSim(P, lambda=0.1,pl1=TRUE, pl2=FALSE, last=log(1000))$est, lambda1 = lambdaSim(P, lambda=1,pl1=TRUE, pl2=FALSE,last=log(1000))$est, lambda100 = lambdaSim(P, lambda=100,pl1=TRUE, pl2=FALSE,last=log(1000))$est) par(mfcol=c(1,1)) # Example 3. Estimation of lambda. Three markers freq1 <- c(0.4,0.6) freq2 <- c(0.2,0.8) freq3 <- c(0.3,0.7) P <- outer(outer(freq1,freq2),freq3) #P <- freq1 %o% freq2 %o% freq3 set.seed(17) par(mfcol=c(3,1)) data.frame(lambda0.1 = lambdaSim(P, lambda=0.1)$est, lambda1 = lambdaSim(P, lambda=1)$est, lambda100 = lambdaSim(P, lambda=100)$est) par(mfcol=c(1,1)) #Frequencies from Exercise 4.17 freq1 <- c(0.019,0.104,0.877) freq2 <- c(0.057,0.066,0.877) freq3 <- c(0.354, 0.316,0.33) P <- outer(outer(freq1,freq2),freq3) #P <- freq1 %o% freq2 %o% freq3 set.seed(17) par(mfcol=c(3,1)) data.frame(lambda0.1 = lambdaSim(P, lambda=0.1)$est, lambda1 = lambdaSim(P, lambda=1)$est, lambda100 = lambdaSim(P, lambda=100)$est) par(mfcol=c(1,1)) #Data from Exercise 4.17, Cluster1 dat <- array(0,dim(P)) dat[2,1,1] <- 1; dat[2,2,1] <- 1 dat[2,1,2] <-1; dat[2,2,2] <- 2 dat[2,3,3] <- 207 res <- lambdaEst(P,dat)$lambda.est ###Example 4. Part 4 of exercise plotLD <- function (f1, f2, first =log(0.01), last=log(10000)) { n1 <- length(f1) n2 <- length(f2) N1 <- 100 N2 <- 1000 P <- outer(f1,f2) lambda <- exp(seq(first, last, length.out=N1)) LD <- array(0, c(N1, n1, n2)) for (i in 1:N1) for (j in 1:N2) LD[i,,] <- LD[i,,] + abs(rprior(P, lambda[i])-P) LD <- LD/N2 if(is.na(max(LD))) upper <- 1 else upper <- max(LD) plot(lambda, seq(0, upper, length.out=N1), type = "n", log="x", ylab = "average absolute LD") matlines(lambda, matrix(LD, N1, n1*n2)) } set.seed(17) freq1 <- c(0.4,0.6) freq2 <- c(0.2,0.8) plotLD(freq1,freq2) legend("topright",c("haplotype 1-1","haplotype 2-1","haplotype 1-2","haplotype 2-2"),lty=1:4,col=1:4) #Example 5 ## Not run: # We first simulate 5 data (function above must be defined) # sets each with 1000 haplotypes with known lambda freq1 <- c(0.1, 0.3, 0.6) freq2 <- c(0.1, 0.2, 0.3, 0.4) P <- outer(freq1,freq2) set.seed(17) res <- lambdaSim(P, lambda=30, last =log(1000), nsim=5, pl2 = TRUE)$simData #The below function writes haplo1.txt, ..., haplo5.txt which can #be read into FamLinkX to see how close estimated lambda is to original value. # The sav file must be consistent with the above specification. convertHaplo <- function(dat){ ndim <- dim(dat[[1]]) nHaplo <- prod(ndim) col1 <- 1:nHaplo lr <- length(res) files <- vector("list",lr) for (i in 1:length(dat)){ col2 <- c(dat[[i]]) col3 <- rep(1:ndim[1],ndim[2]) col4 <- rep(1:ndim[2],each=ndim[1]) files[[i]] <- data.frame(HaplotypeID=col1, Number=col2,L1=col3,L2=col4) write.table(files[[i]],file=paste("haplo",i,".txt",sep=""),quote=FALSE, row.names=FALSE, sep="\t") } 0 } convertHaplo(res) ## End(Not run)