ANGSD: Analysis of next generation Sequencing Data
Latest tar.gz version is (0.938/0.939 on github), see Change_log for changes, and download it here.
Inbreed
From angsd
Definition
- n
- total number of individuals
- X
- all sequencing data
- f
- allele frequency
- F
- inbreeding coefficient
- G
- genotype
total likelihood
suggestion for EM
based on
where
getLikes<-function(x,d=5,e=0.01,norm=FALSE){
n<-length(x)
dep<-rpois(n,d)
nA<-rbinom(n,dep,c(e,0.5,1-e)[x+1])
res<-rbind(dbinom(nA,dep,e),dbinom(nA,dep,0.5),dbinom(nA,dep,1-e))
if(norm)
res<-t(t(res)/colSums(res))
res
}
MLoptim<-function(like){
getLike<-function(x,like){
freq<-x[1]
F<-x[2]
-sum(log(
like[1,]*((1-freq)^2+freq*(1-freq)*F)
+like[2,]*(2*freq*(1-freq)-2*freq*(1-freq)*F)
+like[3,]*(freq^2+freq*(1-freq)*F)
))
}
optim(c(0.1,0.1),getLike,like=like)
}
squaremMy<-
function (par, emStep, likeFun, ...)
{
maxiter <- 1500
tol = 1e-07
step.min <- 1
step.max0 <- 1
step.max <- 1
mstep <- 4
iter <- 1
objval <- rep(NA, 1)
p <- par
lold <- likeFun(p, ...)
leval <- 1
feval <- 0
conv <- TRUE
while (feval < maxiter) {
extrap <- TRUE
p1 <- emStep(p, ...)
feval <- feval + 1
q1 <- p1 - p
sr2 <- crossprod(q1)
if (sqrt(sr2) < tol)
break
p2 <- emStep(p1, ...)
feval <- feval + 1
q2 <- p2 - p1
sq2 <- sqrt(crossprod(q2))
if (sq2 < tol)
break
sv2 <- crossprod(q2 - q1)
srv <- crossprod(q1, q2 - q1)
alpha <- sqrt(sr2/sv2)
alpha <- max(step.min, min(step.max, alpha))
p.new <- p + 2 * alpha * q1 + alpha^2 * (q2 - q1)
if (abs(alpha - 1) > 0.01) {
p.new <- try(emStep(p.new, ...), silent = TRUE)
feval <- feval + 1
}
lnew <- lold
if (alpha == step.max)
step.max <- mstep * step.max
if (step.min < 0 & alpha == step.min)
step.min <- mstep * step.min
p <- p.new
iter <- iter + 1
}
return(list(par = p, value.likeFun = lold, iter = iter, fpevals = feval,
objfevals = leval, convergence = conv))
}
MLEM<-function(like,iter=100){
getLike<-function(x,like){
freq<-x[1]
F<-x[2]
-sum(log(
like[1,]*((1-freq)^2+freq*(1-freq)*F)
+like[2,]*(2*freq*(1-freq)-2*freq*(1-freq)*F)
+like[3,]*(freq^2+freq*(1-freq)*F)
))
}
em<-function(x,like){
freq<-x[1]
F<-x[2]
n<-ncol(like)
pp<-cbind(
like[1,]*((1-freq)^2+freq*(1-freq)*F),
like[2,]*(2*freq*(1-freq)-2*freq*(1-freq)*F),
like[3,]*(freq^2+freq*(1-freq)*F)
)
pp<-pp/rowSums(pp)
f2<-sum(pp[,2]+2*pp[,3])/(2*n)
oHO<-sum(pp[,1]+pp[,3])
eHO<-n*(freq^2+(1-freq)^2)
F2<-(oHO-eHO)/(n-eHO)
c(f2,max(0,F2))
}
squaremMy(c(0.01,0.01),emStep=em,like=like,likeFun=getLike)
}
N<-5000
F<-0.1
freq<-0.2
geno<-sample(0:2,N,replace=T,prob=c((1-freq)^2+freq*(1-freq)*F,2*freq*(1-freq)-2*freq*(1-freq)*F,freq^2+freq*(1-freq)*F))
like<-getLikes(geno,d=4,e=0.01,norm=TRUE)
library(SQUAREM)
system.time(mm<-MLoptim(like))
system.time(ss<-MLEM(like)) #the one of implement