(* Package Name: salvador.m Author: Qi Zheng, National Center for Toxicological Research Starting date: March 14, 2000 Major revision: March 5, 2001 Refinement started: May 11, 2001 *) BeginPackage["salvador`", {"Statistics`DescriptiveStatistics`", "Statistics`DiscreteDistributions`", "Statistics`ContinuousDistributions`", "Statistics`Common`DistributionsCommon`"}] Options[plotFit] = {logPlot -> False, DifferentialGrowth -> False, dataStyle -> {RGBColor[1, 0, 0], PointSize[0.015]}, fitStyle -> {RGBColor[0, 1, 0], Thickness[0.006]} } pmfLD::usage="pmfLD[\!\(m,\[Phi],k\)] returns a list of probabilities \!\({p\_0,p\_1,..., p\_k}\) according to a Luria-Delbr\!\(\[UDoubleDot]\)uck distribution, LD(m,\!\(\[Phi]\)). Note pmfLD[m,k] is the same as pmfLD[m,1,k]." pmfM::usage="pmfM[m,\!\(\[Rho]\),k] returns a list of probabilities \!\({p\_0,p\_1,...,p\_k}\) according to an \!\(M(m,\[Rho],1)\) distribution; pmfM[m,\!\(\[Rho],\[Phi]\),n] returns the same list according to an \!\(M(m,\[Rho],\[Phi])\) distribution. Note m is the mean number of mutations, \!\(\[Rho]=\[Beta]\_2/\[Beta]\_1\) is the ratio of mutant growth rate to nonmutant growth rate, and \!\(\[Phi]=1-N\_0/N\_T\)." pmfMKoch::usage="pmfMKoch[m,\!\(\[Rho]\),k] computes \!\({p\_0,p\_1,...,p\_k}\) for an \!\(M(m,\[Rho],1)\) distribution, using Koch's algorithm. (This function is not recommended for large k.)" pmfPoisson::usage="pmfPoisson[\!\(\[Lambda]\),k] returns a list of probabilities \!\({p\_0,p\_1,...,p\_k}\) according to a Poisson(\!\(\[Lambda]\)) distribution." pmfCairns::usage="pmfCairns[\!\(m,\[Phi],\[Lambda],k\)] returns a list of probabilities \!\({p\_0,...,p\_k}\) according to the convolution of an LD(\!\(m,\[Phi]\)) distribution and a Poisson(\!\(\[Lambda]\)) distribution." pmf2cdf::usage="pmf2cdf[\!\({p\_0,p\_1,...,p\_k}\)] returns a list of cumulative probabilities according to the given list of probabilities \!\({p\_0,p\_1,...,p\_k}\)." simuMutants::usage="simuMutants[\!\(\[Mu],\[Beta]\_1,\[Beta]\_2, N\_0,N\_T\)] simulates the number of mutants in a sister culture. Note \!\(\[Mu]\) is the mutation rate per cell per unit time; \!\(\[Beta]\_1\) is growth rate for nonmutants; \!\(\[Beta]\_2\) is growth rate of mutants; \!\(N\_0\) is the initial population size; \!\(N\_T\) is the population size prior to plating." MeanOfMutants::usage="MeanOfMutants[\!\(\[Mu],\[Beta]\_1,\[Beta]\_2,N\_0,N\_T\)] computes the mean number of mutants on each sister culture." VarOfMutants::usage="VarOfMutants[\!\(\[Mu],\[Beta]\_1,\[Beta]\_2,N\_0,N\_T\)] computes the variance of the number of mutants on each sister culture." mleLD::usage="mleLD[data,\!\(\[Phi]:=1\),M:=50] searches in the interval [0.001,M] for the maximum likelihood estimate of m, the mean number of mutations, when data is assumed to be generated by an LD(m,\!\(\[Phi]\)) distribution. Note \!\(\[Phi]\) is considered as known." mleM::usage="mleM[data,M:50,R:5] searches for the maximum likelihood estimates of both m and \!\(\[Rho]\) by assuming that the data is generated by an M(m,\!\(\[Rho]\),1) distribution. The search is confined to \!\(m\[Element](0.001,M)\) and \!\(\[Rho]\[Element](0.01,R)\)." FisherInfoLD::usage="FisherInfoLD[m,\!\(\[Phi]\),k] computes the Fisher information for m according to an LD(m,\!\(\[Phi]\)) distribution; it retains k terms in the series \!\(I(m)=\[Sum] f\^2\%j/p\_j\)." FisherInfoM::usage="FisherInfoM[m,\!\(\[Rho],\[Phi]\),k] computes the Fisher information for m, I(m), according to an M(m,\!\(\[Rho],\[Phi]\)) distribution; it retains k terms in the series \!\(I(m)=\[Sum] f\^2\%j/p\_j\). Note that FisherInfoM[m,\!\(\[Rho]\),k] assumes \!\(\[Phi]=1\)." FisherInfoRho::usage="FisherInfoRho[m,\!\(\[Rho]\),k] computes the Fisher information for \!\(\[Rho]\), I(\!\(\[Rho]\)), using an M(m, \!\(\[Rho],1\)) distribution. It retains k terms in the series \!\(I(\!\(\[Rho]\))=\[Sum] f\^2\%j/p\_j\)." FisherInfoMLea::usage="FisherInfoMLea[m,\!\(\[Rho]\),k] computes the Fisher information for both m and \!\(\[Rho]\) according to an M(m,\!\(\[Rho]\),1) distribution by the algorithm proposed by Lea and Coulson. It uses k terms to approximate I(m) and I(\!\(\[Rho]\)). (This function is not recommended for large k.)" MethodOfP0::usage="MethodOfP0[data] estimates m, the expected number of mutations, using the \!\(P\_0\) method." MethodOfMeans::usage="MethodOfMeans[data,\!\(w\_0:=10\)] estimates m (the mean number of mutations) by the method of means as proposed by Luria and Delbr\!\(\[UDoubleDot]\)ck. Note \!\(w\_0\) is an initial value needed for numerically solving a transcendental equation." MethodOfMedians::usage="MethodOfMedians[data,\!\(w\_0:=1\)] estimates m, the expected number of mutations, using the method of medians proposed by Lea and Coulson. Note \!\(w\_0\) is an initial value needed to solve a transcendental equation by Newton's method. If only the median of the data is known, say med, MethodOfMedians[med] also returns an estimate of m." semiMLE::usage="semiMLE[data,\!\(m\_0:=1\)] computes an estimate of m by solving \!\(\[Sum]Y\_i=0\), where \!\(Y\_i\) are the Lea-Coulson transforms of the original data. Note that \!\(m\_0\) is a starting value used to solve the equation." CIMutationRate::usage="CIMutationRate[data,\!\(\[Alpha],N\_T,k\)] constructs a \!\(\[Alpha]\) level confidence interval for \!\(\[Mu]\_\[Beta]\) using an LD(m,1) distribution. It uses k terms in computing the Fisher information for m. It is important to choose a suitable k. Note \!\(N\_T\) is the number of nonmutants prior to plating." plotFit::usage="plotFit[data,opts] fits either an LD(m,1) or an M(m,\!\(\[Rho]\),1) distribution to data, and then plots the fitted distribution along with the data. Use options for different ploting styles." Begin["`Private`"] (* Fisher's information I(m) for an LD(m,phi) *) (* revised October 30, 2001 *) FisherInfoLD[m_, phi_:1, n_] := Module[{l, p, f}, l = Table[phi^(k-1)*(1/k - phi/(k + 1)), {k, 1, n}]; l = Prepend[l, -1]; p = pmfLD[m, phi, n]; If[Last[p] == 0, Return[Print["n is too large \!\(p\_n=0\) encountered."]]]; f = $luria`$pdfconv[p, N[l]]; Plus @@ (f^2/p)]/;(m>0)&&(phi>0)&&(phi<=1)&&(n>0) (* Fisher's information I(m) for an M(m,rho,phi) distribution *) FisherInfoM[m, 1, phi_, n_Integer]:=FisherInfoLD[m,phi,n] FisherInfoM[m_, r_, 1, n_Integer] := $luria`$kochinfo[N[m],N[r],n] FisherInfoM[m_, r_, n_Integer] := FisherInfoM[m,r,1,n] FisherInfoM[m_, r_, phi_, n_Integer] := Module[{l, p, f, omega}, omega = 1 - (1 - phi)^r; l = (Beta[omega, #1, 1 + 1/r] & ) /@ Range[n]; l = Prepend[l/r/phi, -1]; p = pmfM[m, r, phi, n]; f = $luria`$pdfconv[l, p]; Plus @@ (f^2/p)]/;(m>0)&&(r>0)&&(phi>0)&&(phi<1)&&(n>0) FisherInfoRho[m_, r_, n_Integer] := $luria`$iofrho[N[m],N[r],n] (* FisherInfoLDLC[m_/;Positive[m],n_Integer]/;n>0:= $luria`$fishinfo[N[m],n]*) FisherInfoMLea[m_,r_,n_]:=$luria`$fisherho[N[m],N[r],n] (* simulating the number of mutants in a sister culture *) simuMutants[mu_, b1_, b2_, N0_, Nt_] := Module[{meanMutation, noOfMutation, T, mutationEpoch}, T = Log[Nt/N0]/b1; meanMutation = mu*N0*(Exp[b1*T] - 1)/b1; noOfMutation = Random[PoissonDistribution[meanMutation]]; mutationEpoch = Table[Log[1 + Random[]*(Exp[b1*T] - 1)]/b1,{noOfMutation}]; Plus @@ (Random[GeometricDistribution[Exp[-b2*(T - #1)]]] + 1 & ) /@ mutationEpoch] (* mean number of mutants *) MeanOfMutants[mu_, b1_, b2_, N0_, Nt_] := Module[{t}, t = Log[Nt/N0]/b1; If[b1 == b2, mu*N0*t*Exp[b1*t], mu*N0*(Exp[b1*t] - Exp[b2*t])/(b1 - b2)]] (* variance of the number of mutants *) VarOfMutants[mu_, b1_, b2_, N0_, Nt_] := Module[{t}, t = Log[Nt/N0]/b1; If[b1 == b2, 2*mu*N0*Exp[b1*t]*(Exp[b1*t] - 1)/b1 - mu*N0*t*Exp[b1*t], If[b1 == 2*b2, mu*N0*Exp[b2*t]*(1 - Exp[b2*t] + 2*b2*t*Exp[b2*t])/b2, mu*N0*Exp[b2*t]*(b1*(1 + Exp[(b1 - b2)*t] - 2*Exp[b2*t]) + 2*b2*(Exp[b2*t] - 1))/(b1 - b2)/(b1 - 2*b2)]]] (* Poisson distribution *) pmfPoisson[lambda_ /; Positive[lambda], k_Integer /; NonNegative[k]] := Exp[-N[lambda]]*FoldList[Times, 1, N[lambda]/Range[k]] (* Luria-Delbruck distribution *) pmfLD[m_/;Positive[m], phi_/;(phi>0)&&(phi<=1), k_Integer/;NonNegative[k]] := $luria`$pdfluria[N[m],N[phi],k] pmfLD[m_, k_Integer]:=pmfLD[m,1,k]/;(m>0)&&(k>=0) (* Cairnsian distribution *) pmfCairns[m_, phi_, poi_, k_Integer] := Module[{LDProb, PoiProb}, LDProb = $luria`$pdfluria[N[m], N[phi], k]; PoiProb = pmfPoisson[N[poi],k]; $luria`$pdfconv[LDProb,PoiProb] ]/;(m>0)&&(phi>0)&&(phi<=1)&&(poi>0)&&(k>=0) (* the M distribution *) pmfM[m_,r__,0]:={Exp[-N[m]]} (* Koch's algorithm for the M(m,r,1) distribution, slow for large n *) pmfMKoch[m_, r_, 0]:={Exp[-m]} pmfMKoch[m_, r_, n_Integer] := Module[{q}, q=$luria`$pdfkoch[N[m],N[r],n]; Exp[-N[m]] Prepend[q,1] ]/;(m>0)&&(r>0)&&(n>0) (* pmf for an M(m,r,1) distribution, using an algorithm like in Stewart et al. *) pmfM[m_,r_,n_Integer]:=$luria`$kochstew[N[m],N[r],n] pmfM[m_,r_,1,n_Integer]:=$luria`$kochstew[N[m],N[r],n] (* exact algorithm of Stewart et al. *) pmfM[m_, r_, phi_, n_Integer] := Module[ {q, r1=1+1/r, omega=N[1-(1-phi)^r]}, q = (Beta[omega, #1, r1] & ) /@ Range[n]; q= q*m/r/phi; q = Prepend[q, -N[m]]; $luria`$poisum[q] ]/;(phi>0)&&(phi<1)&&(m>0)&&(r>0)&&(n>0) (* cumulative probabilities *) pmf2cdf[l_List] := FoldList[Plus, First[l], Rest[l]] /; Length[l] > 0 (* the P0 method *) MethodOfP0[data_] := Module[{p0}, p0 = Count[data, 0]/Length[data]; If[p0 > 0, -Log[N[p0]], Return[ Print[ "The \!\(P\_0 \) method is not applicable to this case."];Null]] ] (* Luria and Delbruck's original method of means *) MethodOfMeans[data_List, y0_:10] := Module[{n, M, y, t}, n = Length[data]; M = Plus @@ data; y = FixedPoint[Function[t, t - (t*Log[t] - N[M])/(Log[t] + 1)], y0]; y/n] (* method of median *) MethodOfMedians[med_?NumericQ,y0_:1] := Module[{y}, If[med==0,Exp[-1.24], y=FixedPoint[Function[t,t-(t+Log[t]-1.24-Log[med])/(1 + 1/t)],y0]; med/y] ] MethodOfMedians[data_List,y0_:1] := MethodOfMedians[Median[data],y0] (* the semi-MLE of Lea and Coulson *) semiMLE[data_,m0_:1] := Module[{m,root}, root = FindRoot[leaLike[m, data], {m, m0}]; root[[1]][[2]] ] (* MLE for m using an LD(m,phi) distribution, phi is assumed to be known *) mleLD[data_, phi_:1, M_:50] := Module[{m, opt}, opt = FindMinimum[LDLikelihood[m, phi, data], {m, {0.0001, M}}]; opt[[2]][[1]][[2]] ] (* MLE for both m and r, using an M(m,r,1) distribution *) mleM[data_, M_:50, R_:5] := Module[{m, r, opt}, opt = FindMinimum[MLikelihood[m, r, data], {m, 0.001, M}, {r, 0.01, R}]; {opt[[2]][[1]][[2]], opt[[2]][[2]][[2]]}] (* confidence interval for mutation rate with an LD(m,1) model *) CIMutationRate[data_, level_, Nt_, k_:1000] := Module[{m, z, d, f0, f1}, z = Statistics`DescriptiveStatistics`Quantile[ Statistics`NormalDistribution`NormalDistribution[0, 1], (1 - level)/2]; m = mleLD[data]; f0 = FisherInfoLD[m, 1, k]; f1 = FisherInfoLD[m, 1, k + 1000]; If[f1 - f0 > 0.001, Return[Print["Specified k is too small."]]]; d = z/Sqrt[Length[data]*f1]; {m + d, m - d}/Nt] (* plot the fitting, May 11, 2001 *) plotFit[data_List, opts___] := Module[{distinctPt, freq, cumuFreq, g1, g2, m, r, prob, koch, pointStyle, lineStyle, useLog, rescale}, koch = DifferentialGrowth /. {opts} /. Options[plotFit]; pointStyle = dataStyle /. {opts} /. Options[plotFit]; lineStyle = fitStyle /. {opts} /. Options[plotFit]; useLog = logPlot /. {opts} /. Options[plotFit]; rescale = If[useLog, Log, Identity]; distinctPt = Sort[Union[data]]; freq = (Count[data, #1] & ) /@ distinctPt; cumuFreq = FoldList[Plus, First[freq], Rest[freq]]/Length[data]; g1 = ListPlot[Transpose[{rescale[distinctPt], cumuFreq}], PlotStyle -> Flatten[{pointStyle}], DisplayFunction -> Identity]; If[koch, {m, r} = mleM[data], m = mleLD[data]]; If[koch, prob = pmfM[m, r, Max[data]], prob = pmfLD[m, 1, Max[data]]]; prob = pmf2cdf[prob][[Range[1+Min[data], 1+Max[data]]]]; g2 = ListPlot[Transpose[{rescale[Range[Min[data], Max[data]]], prob}], PlotJoined -> True, PlotStyle -> Flatten[{lineStyle}], DisplayFunction -> Identity]; Show[g1, g2, PlotRange -> All, Frame -> True, DisplayFunction -> $DisplayFunction, Axes -> None]] (* ============== internal functions ============== *) LDLikelihood[m_,phi_,data_List] := Module[{prob, likelihood}, prob = $luria`$pdfluria[N[m], N[phi], Max[data]]; likelihood = (-Log[prob[[#1 + 1]]] & ) /@ data; Plus @@ likelihood] MLikelihood[m_,r_,data_List] := Module[{prob, likelihood}, prob = pmfM[N[m], N[r], Max[data]]; likelihood = (-Log[prob[[#1 + 1]]] & ) /@ data; Plus @@ likelihood] (* -log of likelihood function using the Lea-Coulson normal approximation *) leaLike[m_, data_] := Plus@@((11.6/(#1/m - Log[m] + 4.5) - 2.02)&/@data) End[] EndPackage[]