(* Package Name: salvador.m
   Author: Qi Zheng, National Center for Toxicological Research
   Starting date: March 14, 2000 
   Major revision: March 5, 2001
   Refinement started: May 11, 2001 *)

BeginPackage["salvador`", {"Statistics`DescriptiveStatistics`",
                           "Statistics`DiscreteDistributions`",
                           "Statistics`ContinuousDistributions`",
                           "Statistics`Common`DistributionsCommon`"}]

Options[plotFit] = {logPlot -> False, DifferentialGrowth -> False, 
     dataStyle -> {RGBColor[1, 0, 0], PointSize[0.015]},
     fitStyle -> {RGBColor[0, 1, 0], Thickness[0.006]} }

pmfLD::usage="pmfLD[\!\(m,\[Phi],k\)] returns a list of probabilities
\!\({p\_0,p\_1,..., p\_k}\) according to a Luria-Delbr\!\(\[UDoubleDot]\)uck
distribution, LD(m,\!\(\[Phi]\)). Note pmfLD[m,k] is the same as pmfLD[m,1,k]."

pmfM::usage="pmfM[m,\!\(\[Rho]\),k] returns a list of probabilities
\!\({p\_0,p\_1,...,p\_k}\) according to an \!\(M(m,\[Rho],1)\) distribution;
pmfM[m,\!\(\[Rho],\[Phi]\),n] returns the same list according to an
\!\(M(m,\[Rho],\[Phi])\) distribution. Note m is the mean number of mutations,
\!\(\[Rho]=\[Beta]\_2/\[Beta]\_1\) is the ratio of mutant growth rate to nonmutant
growth rate, and \!\(\[Phi]=1-N\_0/N\_T\)."

pmfMKoch::usage="pmfMKoch[m,\!\(\[Rho]\),k] computes \!\({p\_0,p\_1,...,p\_k}\)
for an \!\(M(m,\[Rho],1)\) distribution, using Koch's algorithm. (This function
is not recommended for large k.)"

pmfPoisson::usage="pmfPoisson[\!\(\[Lambda]\),k] returns a list of probabilities
\!\({p\_0,p\_1,...,p\_k}\) according to a Poisson(\!\(\[Lambda]\)) distribution."

pmfCairns::usage="pmfCairns[\!\(m,\[Phi],\[Lambda],k\)] returns a list of
probabilities \!\({p\_0,...,p\_k}\) according to the convolution of
an LD(\!\(m,\[Phi]\)) distribution and a Poisson(\!\(\[Lambda]\)) distribution."

pmf2cdf::usage="pmf2cdf[\!\({p\_0,p\_1,...,p\_k}\)] returns a list of cumulative
probabilities according to the given list of probabilities
\!\({p\_0,p\_1,...,p\_k}\)."

simuMutants::usage="simuMutants[\!\(\[Mu],\[Beta]\_1,\[Beta]\_2, N\_0,N\_T\)]
simulates the number of mutants in a sister culture. Note \!\(\[Mu]\) is the
mutation rate per cell per unit time; \!\(\[Beta]\_1\) is growth rate for
nonmutants; \!\(\[Beta]\_2\) is growth rate of mutants; \!\(N\_0\) is the initial
population size; \!\(N\_T\) is the population size prior to plating."

MeanOfMutants::usage="MeanOfMutants[\!\(\[Mu],\[Beta]\_1,\[Beta]\_2,N\_0,N\_T\)]
computes the mean number of mutants on each sister culture."

VarOfMutants::usage="VarOfMutants[\!\(\[Mu],\[Beta]\_1,\[Beta]\_2,N\_0,N\_T\)]
computes the variance of the number of mutants on each sister culture."

mleLD::usage="mleLD[data,\!\(\[Phi]:=1\),M:=50] searches in the interval
[0.001,M] for the maximum likelihood estimate of m, the mean number of mutations,
when data is assumed to be generated by an LD(m,\!\(\[Phi]\)) distribution. Note
\!\(\[Phi]\) is considered as known." 

mleM::usage="mleM[data,M:50,R:5] searches for the maximum likelihood estimates
of both m and \!\(\[Rho]\) by assuming that the data is generated by an
M(m,\!\(\[Rho]\),1) distribution. The search is confined to
\!\(m\[Element](0.001,M)\) and \!\(\[Rho]\[Element](0.01,R)\)."

FisherInfoLD::usage="FisherInfoLD[m,\!\(\[Phi]\),k] computes the Fisher
information for m according to an LD(m,\!\(\[Phi]\)) distribution; it retains
k terms in the series \!\(I(m)=\[Sum] f\^2\%j/p\_j\)."

FisherInfoM::usage="FisherInfoM[m,\!\(\[Rho],\[Phi]\),k] computes the Fisher
information for m, I(m), according to an M(m,\!\(\[Rho],\[Phi]\)) distribution;
it retains k terms in the series \!\(I(m)=\[Sum] f\^2\%j/p\_j\). Note that
FisherInfoM[m,\!\(\[Rho]\),k] assumes \!\(\[Phi]=1\)."

FisherInfoRho::usage="FisherInfoRho[m,\!\(\[Rho]\),k] computes the
Fisher information for \!\(\[Rho]\), I(\!\(\[Rho]\)), using an
M(m, \!\(\[Rho],1\)) distribution. It retains k terms in the series
\!\(I(\!\(\[Rho]\))=\[Sum] f\^2\%j/p\_j\)."

FisherInfoMLea::usage="FisherInfoMLea[m,\!\(\[Rho]\),k] computes the Fisher
information for both m and \!\(\[Rho]\) according to an M(m,\!\(\[Rho]\),1)
distribution by the algorithm proposed by Lea and Coulson. It uses
k terms to approximate I(m) and I(\!\(\[Rho]\)). (This function is not
recommended for large k.)"

MethodOfP0::usage="MethodOfP0[data] estimates m, the expected number of mutations,
using the \!\(P\_0\) method."

MethodOfMeans::usage="MethodOfMeans[data,\!\(w\_0:=10\)] estimates m (the mean
number of mutations) by the method of means as proposed by Luria and
Delbr\!\(\[UDoubleDot]\)ck. Note \!\(w\_0\) is an initial value needed
for numerically solving a transcendental equation."

MethodOfMedians::usage="MethodOfMedians[data,\!\(w\_0:=1\)] estimates m, the
expected number of mutations, using the method of medians proposed by Lea and
Coulson. Note \!\(w\_0\) is an initial value needed to solve a transcendental
equation by Newton's method.  If only the median of the data is known,
say med, MethodOfMedians[med] also returns an estimate of m."

semiMLE::usage="semiMLE[data,\!\(m\_0:=1\)] computes an estimate
of m by solving \!\(\[Sum]Y\_i=0\), where \!\(Y\_i\) are the
Lea-Coulson transforms of the original data. Note that
\!\(m\_0\) is a starting value used to solve the equation."

CIMutationRate::usage="CIMutationRate[data,\!\(\[Alpha],N\_T,k\)] constructs
a \!\(\[Alpha]\) level confidence interval for \!\(\[Mu]\_\[Beta]\) using an
LD(m,1) distribution. It uses k terms in computing the Fisher information for
m. It is important to choose a suitable k. Note \!\(N\_T\) is the number of
nonmutants prior to plating."

plotFit::usage="plotFit[data,opts] fits either an LD(m,1) or an
M(m,\!\(\[Rho]\),1) distribution to data, and then plots the fitted distribution
along with the data. Use options for different ploting styles."

Begin["`Private`"]  
(* Fisher's information I(m) for an LD(m,phi) *)
(* revised October 30, 2001 *)
FisherInfoLD[m_, phi_:1, n_] := Module[{l, p, f}, 
     l = Table[phi^(k-1)*(1/k - phi/(k + 1)), {k, 1, n}];
     l = Prepend[l, -1]; 
     p = pmfLD[m, phi, n];
     If[Last[p] == 0, Return[Print["n is too large \!\(p\_n=0\) encountered."]]]; 
     f = $luria`$pdfconv[p, N[l]];
     Plus @@ (f^2/p)]/;(m>0)&&(phi>0)&&(phi<=1)&&(n>0)

(* Fisher's information I(m) for an M(m,rho,phi) distribution *)
FisherInfoM[m, 1, phi_, n_Integer]:=FisherInfoLD[m,phi,n]

FisherInfoM[m_, r_, 1, n_Integer] := $luria`$kochinfo[N[m],N[r],n]

FisherInfoM[m_, r_, n_Integer] := FisherInfoM[m,r,1,n]

FisherInfoM[m_, r_, phi_, n_Integer] := Module[{l, p, f, omega}, 
     omega = 1 - (1 - phi)^r;
     l = (Beta[omega, #1, 1 + 1/r] & ) /@ Range[n]; 
     l = Prepend[l/r/phi, -1];
     p = pmfM[m, r, phi, n];
     f = $luria`$pdfconv[l, p]; 
     Plus @@ (f^2/p)]/;(m>0)&&(r>0)&&(phi>0)&&(phi<1)&&(n>0)

FisherInfoRho[m_, r_, n_Integer] := $luria`$iofrho[N[m],N[r],n]
 
(* FisherInfoLDLC[m_/;Positive[m],n_Integer]/;n>0:= $luria`$fishinfo[N[m],n]*)

FisherInfoMLea[m_,r_,n_]:=$luria`$fisherho[N[m],N[r],n] 

(* simulating the number of mutants in a sister culture *)
simuMutants[mu_, b1_, b2_, N0_, Nt_] := 
    Module[{meanMutation, noOfMutation, T, mutationEpoch},
      T = Log[Nt/N0]/b1;
      meanMutation = mu*N0*(Exp[b1*T] - 1)/b1; 
      noOfMutation = Random[PoissonDistribution[meanMutation]]; 
      mutationEpoch = Table[Log[1 + Random[]*(Exp[b1*T] - 1)]/b1,{noOfMutation}];
      Plus @@ (Random[GeometricDistribution[Exp[-b2*(T - #1)]]] + 1 & ) /@ 
        mutationEpoch]

(* mean number of mutants *)
MeanOfMutants[mu_, b1_, b2_, N0_, Nt_] := Module[{t},
   t = Log[Nt/N0]/b1;
   If[b1 == b2, mu*N0*t*Exp[b1*t], 
       mu*N0*(Exp[b1*t] - Exp[b2*t])/(b1 - b2)]]
 
(* variance of the number of mutants *) 
VarOfMutants[mu_, b1_, b2_, N0_, Nt_] := Module[{t},
    t = Log[Nt/N0]/b1;
    If[b1 == b2, 
       2*mu*N0*Exp[b1*t]*(Exp[b1*t] - 1)/b1 - mu*N0*t*Exp[b1*t], 
       If[b1 == 2*b2, mu*N0*Exp[b2*t]*(1 - Exp[b2*t] + 2*b2*t*Exp[b2*t])/b2, 
        mu*N0*Exp[b2*t]*(b1*(1 + Exp[(b1 - b2)*t] - 2*Exp[b2*t]) + 
            2*b2*(Exp[b2*t] - 1))/(b1 - b2)/(b1 - 2*b2)]]]
 
(* Poisson distribution *)
pmfPoisson[lambda_ /; Positive[lambda], k_Integer /; NonNegative[k]] := 
    Exp[-N[lambda]]*FoldList[Times, 1, N[lambda]/Range[k]]

(* Luria-Delbruck distribution *)
pmfLD[m_/;Positive[m], phi_/;(phi>0)&&(phi<=1),
  k_Integer/;NonNegative[k]] := $luria`$pdfluria[N[m],N[phi],k]

pmfLD[m_, k_Integer]:=pmfLD[m,1,k]/;(m>0)&&(k>=0)

(* Cairnsian distribution *)
pmfCairns[m_, phi_, poi_, k_Integer] := Module[{LDProb, PoiProb}, 
     LDProb = $luria`$pdfluria[N[m], N[phi], k];
     PoiProb = pmfPoisson[N[poi],k];
     $luria`$pdfconv[LDProb,PoiProb]
]/;(m>0)&&(phi>0)&&(phi<=1)&&(poi>0)&&(k>=0)

(* the M distribution *)
pmfM[m_,r__,0]:={Exp[-N[m]]}

(* Koch's algorithm for the M(m,r,1) distribution, slow for large n *)
pmfMKoch[m_, r_, 0]:={Exp[-m]}
pmfMKoch[m_, r_, n_Integer] := Module[{q},
q=$luria`$pdfkoch[N[m],N[r],n];
Exp[-N[m]] Prepend[q,1]
]/;(m>0)&&(r>0)&&(n>0)

(* pmf for an M(m,r,1) distribution, using an algorithm like in Stewart et al. *)
pmfM[m_,r_,n_Integer]:=$luria`$kochstew[N[m],N[r],n]

pmfM[m_,r_,1,n_Integer]:=$luria`$kochstew[N[m],N[r],n]

(* exact algorithm of Stewart et al. *)
pmfM[m_, r_, phi_, n_Integer] := Module[
      {q, r1=1+1/r, omega=N[1-(1-phi)^r]},
       q = (Beta[omega, #1, r1] & ) /@ Range[n]; 
       q= q*m/r/phi;
       q = Prepend[q, -N[m]];
       $luria`$poisum[q]
]/;(phi>0)&&(phi<1)&&(m>0)&&(r>0)&&(n>0)

(* cumulative probabilities *)
pmf2cdf[l_List] := FoldList[Plus, First[l], Rest[l]] /; Length[l] > 0

(* the P0 method *)
MethodOfP0[data_] := Module[{p0},
      p0 = Count[data, 0]/Length[data]; 
      If[p0 > 0, -Log[N[p0]], Return[ Print[
        "The \!\(P\_0 \) method is not applicable to this case."];Null]]
]

(* Luria and Delbruck's original method of means *) 
MethodOfMeans[data_List, y0_:10] := Module[{n, M, y, t}, 
     n = Length[data];
     M = Plus @@ data;
     y = FixedPoint[Function[t, t - (t*Log[t] - N[M])/(Log[t] + 1)], y0]; 
     y/n]

(* method of median *)
MethodOfMedians[med_?NumericQ,y0_:1] := Module[{y},
      If[med==0,Exp[-1.24], 
        y=FixedPoint[Function[t,t-(t+Log[t]-1.24-Log[med])/(1 + 1/t)],y0];
        med/y]
]

MethodOfMedians[data_List,y0_:1] := MethodOfMedians[Median[data],y0]

(* the semi-MLE of Lea and Coulson *)
semiMLE[data_,m0_:1] := Module[{m,root}, 
     root = FindRoot[leaLike[m, data], {m, m0}];
     root[[1]][[2]]
]

(* MLE for m using an LD(m,phi) distribution, phi is assumed to be known *)
mleLD[data_, phi_:1, M_:50] := Module[{m, opt}, 
     opt = FindMinimum[LDLikelihood[m, phi, data], {m, {0.0001, M}}]; 
     opt[[2]][[1]][[2]]
]

(* MLE for both m and r, using an M(m,r,1) distribution *)
mleM[data_, M_:50, R_:5] := Module[{m, r, opt}, 
     opt = FindMinimum[MLikelihood[m, r, data], {m, 0.001, M}, {r, 0.01, R}];
    {opt[[2]][[1]][[2]], opt[[2]][[2]][[2]]}]

(* confidence interval for mutation rate with an LD(m,1) model *)
CIMutationRate[data_, level_, Nt_, k_:1000] := Module[{m, z, d, f0, f1}, 
     z = Statistics`DescriptiveStatistics`Quantile[
         Statistics`NormalDistribution`NormalDistribution[0, 1], (1 - level)/2]; 
     m = mleLD[data];
     f0 = FisherInfoLD[m, 1, k]; 
     f1 = FisherInfoLD[m, 1, k + 1000];
     If[f1 - f0 > 0.001, Return[Print["Specified k is too small."]]];
     d = z/Sqrt[Length[data]*f1]; 
     {m + d, m - d}/Nt]

(* plot the fitting, May 11, 2001 *)

plotFit[data_List, opts___] := Module[{distinctPt, freq, cumuFreq, g1, g2, m, 
      r, prob, koch, pointStyle, lineStyle, useLog, rescale}, 
     koch = DifferentialGrowth /. {opts} /. Options[plotFit]; 
      pointStyle = dataStyle /. {opts} /. Options[plotFit]; 
      lineStyle = fitStyle /. {opts} /. Options[plotFit]; 
      useLog = logPlot /. {opts} /. Options[plotFit]; 
      rescale = If[useLog, Log, Identity];
      distinctPt = Sort[Union[data]]; 
      freq = (Count[data, #1] & ) /@ distinctPt; 
      cumuFreq = FoldList[Plus, First[freq], Rest[freq]]/Length[data]; 
      g1 = ListPlot[Transpose[{rescale[distinctPt], cumuFreq}], 
        PlotStyle -> Flatten[{pointStyle}],
        DisplayFunction -> Identity];
      If[koch, {m, r} = mleM[data], m = mleLD[data]]; 
      If[koch, prob = pmfM[m, r, Max[data]], prob = pmfLD[m, 1, Max[data]]]; 
      prob = pmf2cdf[prob][[Range[1+Min[data], 1+Max[data]]]]; 
      g2 = ListPlot[Transpose[{rescale[Range[Min[data], Max[data]]], prob}], 
        PlotJoined -> True, PlotStyle -> Flatten[{lineStyle}],
        DisplayFunction -> Identity];
      Show[g1, g2, PlotRange -> All, Frame -> True, 
       DisplayFunction -> $DisplayFunction, Axes -> None]]
 

(* ============== internal functions ============== *)
LDLikelihood[m_,phi_,data_List] := Module[{prob, likelihood}, 
     prob = $luria`$pdfluria[N[m], N[phi], Max[data]];
     likelihood = (-Log[prob[[#1 + 1]]] & ) /@ data;
     Plus @@ likelihood]

MLikelihood[m_,r_,data_List] := Module[{prob, likelihood}, 
     prob = pmfM[N[m], N[r], Max[data]];
     likelihood = (-Log[prob[[#1 + 1]]] & ) /@ data;
     Plus @@ likelihood]

(* -log of likelihood function using the Lea-Coulson normal approximation *)
leaLike[m_, data_] := Plus@@((11.6/(#1/m - Log[m] + 4.5) - 2.02)&/@data)

End[]  
EndPackage[]