lambdaEst {BookEKM}R Documentation

Linkage disequilibrium: Lambda estimate

Description

The function wraps up and extends on code in Example 7.4 in the book EKM (2016) by providing solutions to an exercise formulated by Petter Mostad

Usage

lambdaEst(P, data, pl = TRUE, first = log(0.01), last = log(10000), ngrid = 1000)

Arguments

P

Matrix or array (if more than two markers) with probabilities corresponding to LE (F rather than P used in Example 7.4.)

data

Data or pseudoconts of non-negative integers on same format as P above.

pl

Logical. Plots if TRUE

first

Real number. Log of lower bound for lambda estimate

last

Real number. Log of upper bound for lambda estimate

ngrid

Number of points for which the log likelihood is calculated

Details

The maximum value for the likelihood is found by brute force.

Value

lambda estimate

Author(s)

Thore.Egeland@gmail.com

References

Egeland, Kling and Mostad (2016)

Examples

###Example 1 (Example 7.4 in book)
freq1 <- c(0.1, 0.3, 0.6)
freq2 <- c(0.1, 0.2, 0.3, 0.4)
P <- outer(freq1,freq2) # same as P <- freq1%o%freq2
data1 <- rbind(c(1,2,3,5),
               c(0,5,9,11),
               c(6,12,20,26))
res1 <- lambdaEst(P,data1)
#Some LD introduced
data2 <- data1
data2[2:3,3:4] <- data2[2:3,3:4]+ c(10,-10,-10,10)
res2 <- lambdaEst(P,data2)

###Example 2. Solution exercise, part 1.
# First some functions are defined to sample from the dirichlet prior
# and simulate the datbase.
rdirichlet <- function (alpha) {
    v <- rgamma(length(alpha), alpha)
     v/sum(v)
}
rprior <- function(P,lambda) {
    array(rdirichlet(lambda*P),dim(P))
}

rdatabase <- function(N, P, lambda) {
    array(rmultinom(1, N, rprior(P, lambda)), dim(P))
}
# The next functions solves part 1,2 and 3 of the mentioned exercise.

lambdaSim <- function(P,lambda, N=1000, nsim=100, first=log(0.001),
    last=log(10000),ngrid=1000, pl1=FALSE, pl2=TRUE){
    lambda.vec <- exp(seq(first,last, length.out=ngrid))
    res <- NULL
    loglik <- 0
	dat <- vector("list",nsim)
    for (i in 1: nsim){
        dat[[i]] <- rdatabase(N, P, lambda)
        foo <- lambdaEst(P,dat[[i]],pl=FALSE,first=first,last=last,ngrid=ngrid)
        res <- c(res, foo$lambda.est)
        loglik <- loglik + foo$loglik
    }
    est <- lambda.vec[which.max(loglik)]
    if(pl1) plot(density(res,from=0),
	    main=paste("lambda: ", lambda, "est: ", round(est,3)))
    if(pl2) plot(lambda.vec, loglik,type="l",
	    xlab="lambda", ylab="log likelihood")
list(allLambda=res,est=est, simData=dat)
}

# The figure for part 1 of the exercise:
freq1 <- c(0.1, 0.3, 0.6)
freq2 <- c(0.1, 0.2, 0.3, 0.4)
P <- outer(freq1,freq2)
set.seed(17)
res <- lambdaSim(P, lambda=30, last =log(1000), nsim=1, pl2 = TRUE)
abline(v=30)
# The estimate, part 2
res$est
# Simulation with known lambda and estimation. Figure 2
par(mfcol=c(3,1))
data.frame(lambda0.1 = lambdaSim(P, lambda=0.1,pl1=TRUE, pl2=FALSE, last=log(1000))$est,
           lambda1   = lambdaSim(P, lambda=1,pl1=TRUE, pl2=FALSE,last=log(1000))$est,
           lambda100 = lambdaSim(P, lambda=100,pl1=TRUE, pl2=FALSE,last=log(1000))$est)
par(mfcol=c(1,1))

# Example 3. Estimation of lambda. Three markers
freq1 <- c(0.4,0.6)
freq2 <- c(0.2,0.8)
freq3 <- c(0.3,0.7)
P <- outer(outer(freq1,freq2),freq3) #P <- freq1 %o% freq2 %o% freq3
set.seed(17)
par(mfcol=c(3,1))
data.frame(lambda0.1 = lambdaSim(P, lambda=0.1)$est,
           lambda1   = lambdaSim(P, lambda=1)$est,
           lambda100 = lambdaSim(P, lambda=100)$est)
par(mfcol=c(1,1))

#Frequencies  from Exercise 4.17
freq1 <- c(0.019,0.104,0.877)
freq2 <- c(0.057,0.066,0.877)
freq3 <- c(0.354, 0.316,0.33)
P <- outer(outer(freq1,freq2),freq3) #P <- freq1 %o% freq2 %o% freq3
set.seed(17)
par(mfcol=c(3,1))
data.frame(lambda0.1 = lambdaSim(P, lambda=0.1)$est,
           lambda1   = lambdaSim(P, lambda=1)$est,
           lambda100 = lambdaSim(P, lambda=100)$est)
par(mfcol=c(1,1))

#Data from Exercise 4.17, Cluster1
dat <- array(0,dim(P))
dat[2,1,1] <- 1; dat[2,2,1] <- 1
dat[2,1,2] <-1; dat[2,2,2] <- 2
dat[2,3,3] <- 207
res <- lambdaEst(P,dat)$lambda.est

###Example 4. Part 4 of exercise
plotLD <- function (f1, f2, first =log(0.01), last=log(10000))
{
   n1 <- length(f1)
   n2 <- length(f2)
   N1 <- 100
   N2 <- 1000
   P <- outer(f1,f2)
   lambda <- exp(seq(first, last, length.out=N1))
   LD <- array(0, c(N1, n1, n2))
   for (i in 1:N1)
      for (j in 1:N2)
         LD[i,,] <- LD[i,,] + abs(rprior(P, lambda[i])-P)
   LD <- LD/N2
   if(is.na(max(LD))) upper <- 1 else upper <- max(LD)
   plot(lambda, seq(0, upper, length.out=N1), type = "n", log="x",
      ylab = "average absolute LD")
   matlines(lambda, matrix(LD, N1, n1*n2))
}
set.seed(17)
freq1 <- c(0.4,0.6)
freq2 <- c(0.2,0.8)
plotLD(freq1,freq2)
legend("topright",c("haplotype 1-1","haplotype 2-1","haplotype 1-2","haplotype 2-2"),lty=1:4,col=1:4)

#Example 5
## Not run: 
# We first simulate 5 data (function above must be defined)
# sets each with 1000 haplotypes with known lambda
freq1 <- c(0.1, 0.3, 0.6)
freq2 <- c(0.1, 0.2, 0.3, 0.4)
P <- outer(freq1,freq2)
set.seed(17)
res <- lambdaSim(P, lambda=30, last =log(1000), nsim=5, pl2 = TRUE)$simData

#The below function writes haplo1.txt, ..., haplo5.txt which can
#be read into FamLinkX to see how close estimated lambda is to original value.
# The sav file must be consistent with the above specification.

convertHaplo <- function(dat){
ndim <- dim(dat[[1]])
nHaplo <- prod(ndim)
col1 <- 1:nHaplo
lr <- length(res)
files <- vector("list",lr)
for (i in 1:length(dat)){
	col2 <- c(dat[[i]])
	col3 <- rep(1:ndim[1],ndim[2])
	col4 <- rep(1:ndim[2],each=ndim[1])
    files[[i]] <- data.frame(HaplotypeID=col1, Number=col2,L1=col3,L2=col4)
    write.table(files[[i]],file=paste("haplo",i,".txt",sep=""),quote=FALSE, row.names=FALSE, sep="\t")
}
0
}
convertHaplo(res)

## End(Not run)

[Package BookEKM version 1.0 Index]