ANGSD: Analysis of next generation Sequencing Data

Latest tar.gz version is (0.938/0.939 on github), see Change_log for changes, and download it here.

Inbreed

From angsd
Jump to: navigation, search

Definition



\begin{align}
p(AA)&=P_A^2+p_Ap_aF \\
p(Aa)&=2P_Ap_a-2p_Ap_aF
\end{align}

n
total number of individuals
X
all sequencing data
f
allele frequency
F
inbreeding coefficient
G
genotype

total likelihood 
p(X|f,F)\sim\prod_i^np(X_i|f,F)=\prod_i^n\sum_{G\in \{0,1,2\}}p(X_i|G)p(G|f,F)

suggestion for EM


\begin{align}
f_{j+1}&=\frac{\sum_i^n p(G=1|X_i,f_j,F_j)+2\sum_i^n p(G=2|X_i,f_j,F_j)}{2n}\\
F_{j+1}&=\frac{\sum_i^n p(G=0|X_i,f_j,F_j)+\sum_i^n p(G=2|X_i,f_j,F_j)}{n-n(f_j^2-(1-f_j)^2)}\\
\end{align}

based on


\hat{F}=\frac{O(HO)-E(HO)}{n-E(HO)}

where


\begin{align}
O(HO)/n&=p(AA)+p(aa)\\
E(HO)/n&=p_a^2+p_A^2
\end{align}

getLikes<-function(x,d=5,e=0.01,norm=FALSE){
  n<-length(x)
  dep<-rpois(n,d)
  nA<-rbinom(n,dep,c(e,0.5,1-e)[x+1])
  res<-rbind(dbinom(nA,dep,e),dbinom(nA,dep,0.5),dbinom(nA,dep,1-e))
  if(norm)
    res<-t(t(res)/colSums(res))
  res
}


MLoptim<-function(like){



  getLike<-function(x,like){
    freq<-x[1]
    F<-x[2]
    -sum(log(
             like[1,]*((1-freq)^2+freq*(1-freq)*F)
             +like[2,]*(2*freq*(1-freq)-2*freq*(1-freq)*F)
             +like[3,]*(freq^2+freq*(1-freq)*F)             
            ))

  }
optim(c(0.1,0.1),getLike,like=like)

}
squaremMy<-
function (par, emStep, likeFun, ...) 
{
  maxiter <- 1500
  tol = 1e-07
  step.min <- 1
  step.max0 <- 1
  step.max <- 1
  mstep <- 4
  iter <- 1
  objval <- rep(NA, 1)
  p <- par
  lold <- likeFun(p, ...)
  leval <- 1
  feval <- 0
  conv <- TRUE
  while (feval < maxiter) {
    extrap <- TRUE
    p1 <- emStep(p, ...)
    feval <- feval + 1
    q1 <- p1 - p
    sr2 <- crossprod(q1)
    if (sqrt(sr2) < tol) 
      break
    p2 <- emStep(p1, ...)
    feval <- feval + 1
    q2 <- p2 - p1
    sq2 <- sqrt(crossprod(q2))
    if (sq2 < tol) 
      break
    sv2 <- crossprod(q2 - q1)
    srv <- crossprod(q1, q2 - q1)
    alpha <- sqrt(sr2/sv2)
    alpha <- max(step.min, min(step.max, alpha))
    p.new <- p + 2 * alpha * q1 + alpha^2 * (q2 - q1)
    if (abs(alpha - 1) > 0.01) {
      p.new <- try(emStep(p.new, ...), silent = TRUE)
      feval <- feval + 1
    }
    lnew <- lold
    if (alpha == step.max) 
      step.max <- mstep * step.max
    if (step.min < 0 & alpha == step.min) 
      step.min <- mstep * step.min
    p <- p.new
    iter <- iter + 1
  }
  return(list(par = p, value.likeFun = lold, iter = iter, fpevals = feval, 
              objfevals = leval, convergence = conv))
}

MLEM<-function(like,iter=100){



  getLike<-function(x,like){
    freq<-x[1]
    F<-x[2]
    -sum(log(
             like[1,]*((1-freq)^2+freq*(1-freq)*F)
             +like[2,]*(2*freq*(1-freq)-2*freq*(1-freq)*F)
             +like[3,]*(freq^2+freq*(1-freq)*F)             
            ))

  }
  em<-function(x,like){
    freq<-x[1]
    F<-x[2]
    n<-ncol(like)
    pp<-cbind(
              like[1,]*((1-freq)^2+freq*(1-freq)*F),
              like[2,]*(2*freq*(1-freq)-2*freq*(1-freq)*F),
              like[3,]*(freq^2+freq*(1-freq)*F)             
              )
    pp<-pp/rowSums(pp)
    f2<-sum(pp[,2]+2*pp[,3])/(2*n)
    oHO<-sum(pp[,1]+pp[,3])
    eHO<-n*(freq^2+(1-freq)^2)
    F2<-(oHO-eHO)/(n-eHO)
    c(f2,max(0,F2))
  }
  squaremMy(c(0.01,0.01),emStep=em,like=like,likeFun=getLike)

  

}



N<-5000
F<-0.1
freq<-0.2
geno<-sample(0:2,N,replace=T,prob=c((1-freq)^2+freq*(1-freq)*F,2*freq*(1-freq)-2*freq*(1-freq)*F,freq^2+freq*(1-freq)*F))
like<-getLikes(geno,d=4,e=0.01,norm=TRUE)
library(SQUAREM)

system.time(mm<-MLoptim(like))
system.time(ss<-MLEM(like)) #the one of implement