(* : Title: BootStrapPackage *) (* : Author: Enis Siniksaran *) (* : Mathematica Version: 4.0 *) (* : Date: June,2001 *) (* : Summary : This package is designed to produce some summary statistics and confidence intervals based on bootstrap percentiles and BCa percentiles for the mean, simple linear regression and correlation coefficient. *) BeginPackage["BootStrapPackage`"] BootMean::usage = "BootMean[list,B] returns a table which shows some summary statistics including bootstrap estimate of mean, standard error and bias, confidence intervals based on bootstrap percentiles and BCa percentiles. B which is the number of bootstrap samples must be an integer.It also gives a histogram of B bootstrap replications of the mean. Dashing line indicates the mean of the original sample"; BootRegPair::usage = "BootRegPair[data,B] finds a bootstrap fit for simple linear regression model.The method is based on the bootstrapping pairs.The data must have the form {{x1, y1 },{x2, y2}, ... ,{xk , yk}}. B which is the number of bootstrap samples must be an integer.BootRegPair[list,B] returns a table which shows summary statistics for model parameters beta0 and beta1.It also gives histograms of B bootstrap replications of the estimates of the model parameters. Dashing lines indicate the estimates of the beta0 and beta1 for the original data"; BootRegRes::usage = "BootRegRes[data,B] finds a bootstrap fit for simple linear regression model.The method is based on the bootstrapping residuals.The data must have the form {{x1, y1 },{x2, y2}, ... ,{xk , yk}}. B which is the number of bootstrap samples must be an integer.BootRegRes[list,B] returns a table which shows summary statistics for model parameters beta0 and beta1.It also gives histograms of B bootstrap replications of the estimates of the model parameters. Dashing lines indicate estimates of the beta0 and beta1 for the original data"; BootCorr::usage ="BootCorr[data,B] returns a table which shows some summary statistics including bootstrap estimate of correlation coefficient, standard error and bias, confidence intervals based on bootstrap percentiles and BCa percentiles.The data must have the form {{x1, y1 },{x2, y2}, ... ,{xk , yk}}.B which is the number of bootstrap samples must be an integer.It also gives a histogram of B bootstrap replications of the correlation coefficient.Dashing line indicates the estimate of the correlation coefficient for the original data"; Begin["`Private`"] Needs["DiscreteMath`Combinatorica`"] Needs["Statistics`MultiDescriptiveStatistics`"] Needs["Statistics`NormalDistribution`"] Needs["Statistics`LinearRegression`"] Needs["Graphics`Graphics`"] (* Bootstrapping The Mean *) BootMean[list_List, B_Integer?Positive] := Module[{eboos, eboot, i, means, meanboot, k, bias, seobserved, seboot, meanobserved, zetzero, alphahat, p, ones, ejack, ejacks, mjack, meanjacks, percentiles, BCas, hist}, k = Length[list]; Do[eboot[i] = Table[RandomKSubset[list, 1], {k}], {i, 1, B}]; eboos = Table[eboot[i], {i, 1, B}]; means = Table[Mean[Flatten[eboos[[i]], 1]], {i, 1, B}]; meanboot = N[Mean[means]]; seboot = N[StandardDeviation[means]]; meanobserved = N[Mean[list]]; biasmean = meanobserved - meanboot; seobserved = (StandardDeviation[list]/ Sqrt[Length[list]])*(Sqrt[Length[list] - 1]/ Sqrt[Length[list]]) // N; p = {.005, .025, .05, .95, .975, .995}; percentiles = Table[N[Quantile[means, p[[i]]]], {i, 1, 6}]; Do[ejack[i] = Drop[list, {i}], {i, 1, k}]; ejacks = Table[ejack[i], {i, 1, B}]; meanjacks = N[Table[Mean[ejacks[[i]]], {i, 1, k}]]; mjack = N[Mean[meanjacks]]; ones = Table[1, {i, 1, k}]; alphahat = ((mjack - meanjacks)^3).ones/(6*(((mjack - meanjacks)^2).ones)^(3/2)); zetzero = N[Quantile[NormalDistribution[0, 1], N[Length[Select[means, # < Mean[list] &]]/B]]]; BCas = Table[N[Quantile[means, N[CDF[NormalDistribution[0, 1], zetzero + ((zetzero + N[Quantile[NormalDistribution[0, 1], p[[i]]]])/ (1 - alphahat*(zetzero + N[Quantile[NormalDistribution[0, 1], p[[i]]]])))]]]], {i, 1, 6}]; Print["*", "*", "Bootstrap Results:", B "Replications", "*", "*"]; Print["Summary Statistics"]; Print[ TableForm[{{"meanobserved", "meanboot", "bias", "seobserved", "seboot"}, {meanobserved, meanboot, biasmean, seobserved, seboot}}]]; Print["Bootstrap Percentiles"]; Print[ TableForm[{{"0.5%", "2.5%", "5%", "95%", "97.5%", "99.5%"}, percentiles}]]; Print["BCa Percentiles"]; Print[ TableForm[{{"0.5%", "2.5%", "5%", "95%", "97.5%", "99.5%"}, BCas}]]; hist = Histogram[means, BarStyle -> {RGBColor[1, 1, 1]}, Epilog -> {Dashing[{.01}], Line[{{ meanobserved, 0}, { meanobserved, B}}]}, AxesLabel -> {"Bootstrap Means", "Frequencies"}, ImageSize -> {500, 200}]; ]; (* Bootstrapping Regression:Pairs *) BootRegPair[data_, B_Integer?Positive] := Module[{regress, bestfitparameters, beta0, beta1, popboot, eboot, i, eboos, regressboot, parameterestimates, pairbeta0, pairbeta1, regressjack, parameterestimatesboot, beta0boots, beta1boots, beta0boot, beta1boot, sebootbeta0, sebootbeta1, ejack, ejacks, parameterestimatesjack, beta0jack, beta1jack, beta0jacks, beta1jacks, k, p, ones, alphahat, zetzero, BCas, histbeta0, histbeta1}, regress = Regress[data, {1, x}, x, RegressionReport -> {BestFitParameters}];; bestfitparameters = BestFitParameters /. regress; beta0 = bestfitparameters[[1]]; beta1 = bestfitparameters[[2]]; popboot = Flatten[Join[Table[data, {i, 1, Length[data]}]], 1]; Do[eboot[i] = RandomKSubset[popboot, Length[data]], {i, 1, B}]; eboos = Table[eboot[i], {i, 1, B}]; regressboot = Table[Regress[eboos[[i]], {1, x}, x, RegressionReport -> {BestFitParameters}], {i, 1, B}]; parameterestimates = (bestfitparameters = BestFitParameters /. regressboot); beta0boots = Transpose[parameterestimates][[1]]; beta1boots = Transpose[parameterestimates][[2]]; beta0boot = Mean[beta0boots]; beta1boot = Mean[beta1boots]; sebootbeta0 = StandardDeviation[beta0boots]; sebootbeta1 = StandardDeviation[beta1boots]; biasbeta0 = beta0boot - beta0; biasbeta1 = beta1boot - beta1; k = Length[data]; Do[ejack[i] = Drop[data, {i}], {i, 1, k}]; ejacks = Table[ejack[i], {i, 1, k}]; regressjack = Table[Regress[ejacks[[i]], {1, x}, x, RegressionReport -> {BestFitParameters}], {i, 1, k}]; parameterestimatesjack = (bestfitparameters = BestFitParameters /. regressjack); beta0jacks = Transpose[parameterestimatesjack][[1]]; beta1jacks = Transpose[parameterestimatesjack][[2]]; beta0jack = Mean[beta0jacks]; beta1jack = Mean[beta1jacks]; p = {.005, .025, .05, .95, .975, .995}; percentilesbeta0 = Table[N[Quantile[beta0boots, p[[i]]]], {i, 1, 6}]; percentilesbeta1 = Table[N[Quantile[beta1boots, p[[i]]]], {i, 1, 6}]; ones = Table[1, {i, 1, k}]; alphahatbeta0 = ((beta0jack - beta0jacks)^3).ones/(6*(((beta0jack - beta0jacks)^2).ones)^(3/2)); zetzerobeta0 = N[Quantile[NormalDistribution[0, 1], N[Length[Select[beta0boots, # < beta0 &]]/B]]]; BCasbeta0 = Table[N[Quantile[beta0boots, N[CDF[NormalDistribution[0, 1], zetzerobeta0 + ((zetzerobeta0 + N[Quantile[NormalDistribution[0, 1], p[[i]]]])/ (1 - alphahatbeta0*(zetzerobeta0 + N[Quantile[NormalDistribution[0, 1], p[[i]]]])))]]]], {i, 1, 6}]; alphahatbeta1 = ((beta1jack - beta1jacks)^3).ones/(6*(((beta1jack - beta1jacks)^2).ones)^(3/2)); zetzerobeta1 = N[Quantile[NormalDistribution[0, 1], N[Length[Select[beta1boots, # < beta1 &]]/B]]]; BCasbeta1 = Table[N[Quantile[beta1boots, N[CDF[NormalDistribution[0, 1], zetzerobeta1 + ((zetzerobeta1 + N[Quantile[NormalDistribution[0, 1], p[[i]]]])/ (1 - alphahatbeta1*(zetzerobeta1 + N[Quantile[NormalDistribution[0, 1], p[[i]]]])))]]]], {i, 1, 6}]; Print["*", "*", "Bootstrap Results for Paired Regression:", B "Replications", "*", "*"]; Print["Summary Statistics"]; Print[ TableForm[{{"", "observed", "bootstrap", "bias", "seboot"}, {"beta0", beta0, beta0boot, biasbeta0, sebootbeta0}, {"beta1", beta1, beta1boot, biasbeta1, sebootbeta1}}]]; Print["Bootstrap Percentiles"]; Print[ TableForm[{{"","0.5%", "2.5%", "5%", "95%", "97.5%", "99.5%"}, Prepend[percentilesbeta0,"beta0"], Prepend[percentilesbeta1,"beta1"]}]]; Print["BCa Percentiles"]; Print[ TableForm[{{"","0.5%", "2.5%", "5%", "95%", "97.5%", "99.5%"}, Prepend[BCasbeta0,"beta0"],Prepend[BCasbeta1,"beta1"]}]]; histbeta0 = Histogram[beta0boots, BarStyle -> {RGBColor[1, 1, 1]}, Epilog -> {Dashing[{.01}], Line[{{beta0, 0}, {beta0, B}}]}, AxesLabel -> {"Bootstrap Beta0", "Frequencies"}, ImageSize -> {500, 200}]; histbeta1 = Histogram[beta1boots, BarStyle -> {RGBColor[1, 1, 1]}, Epilog -> {Dashing[{.01}], Line[{{beta1, 0}, {beta1, B}}]}, AxesLabel -> {"Bootstrap Beta1", "Frequencies"}, ImageSize -> {500, 200}]; ]; (* Bootstrapping Regression:Residuals *) BootRegRes[data_, B_Integer?Positive] :=Module[{listx, listy, ones, datamatrix, regress, residuals , bestfitparameters, varyhat, beta0, beta1, k, i, eboot, eboos, yy, booty,g, beta0boots, beta1boots, beta0boot, beta1boot, sebootbeta0, sebootbeta1, biasbeta0, biasbeta1, regressjack, ejack, ejacks, parameterestimatesjack, beta0jack, beta1jack, beta0jacks, beta1jacks, p,alphahat, zetzero, BCas, histbeta0, histbeta1}, listx = Transpose[data][[1]]; listy = Transpose[data][[2]]; ones = Table[1, {i, 1, Length[listx]}]; datamatrix = Transpose[{ones, listx}]; regress = Regress[data, {1, x}, x, RegressionReport -> {FitResiduals, SinglePredictionCITable, BestFitParameters }]; residuals = FitResiduals /. regress; bestfitparameters = BestFitParameters /. regress; varyhat = Transpose[(SinglePredictionCITable /. regress)[[1]]][[2]]; beta0 = bestfitparameters[[1]]; beta1 = bestfitparameters[[2]]; k = Length[data]; Do[eboot[i] = Table[RandomKSubset[residuals, 1], {k}], {i, 1, B}]; eboos = Table[eboot[i], {i, 1, B}]; yy = Table[varyhat, {B}]; booty = yy + eboos; g = Inverse[ Transpose[datamatrix].datamatrix].(Transpose[datamatrix]. Transpose[booty]); beta0boots = Flatten[g[[1]]]; beta1boots = Flatten[g[[2]]]; beta0boot = Mean[beta0boots]; beta1boot = Mean[beta1boots]; sebootbeta0 = StandardDeviation[beta0boots]; sebootbeta1 = StandardDeviation[beta1boots]; biasbeta0 = beta0boot - beta0; biasbeta1 = beta1boot - beta1; Do[ejack[i] = Drop[data, {i}], {i, 1, k}]; ejacks = Table[ejack[i], {i, 1, k}]; regressjack = Table[Regress[ejacks[[i]], {1, x}, x, RegressionReport -> {BestFitParameters}], {i, 1, k}]; parameterestimatesjack = (bestfitparameters = BestFitParameters /. regressjack); beta0jacks = Transpose[parameterestimatesjack][[1]]; beta1jacks = Transpose[parameterestimatesjack][[2]]; beta0jack = Mean[beta0jacks]; beta1jack = Mean[beta1jacks]; p = {.005, .025, .05, .95, .975, .995}; percentilesbeta0 = Table[N[Quantile[beta0boots, p[[i]]]], {i, 1, 6}]; percentilesbeta1 = Table[N[Quantile[beta1boots, p[[i]]]], {i, 1, 6}]; ones = Table[1, {i, 1, k}]; alphahatbeta0 = ((beta0jack - beta0jacks)^3).ones/(6*(((beta0jack - beta0jacks)^2).ones)^(3/2)); zetzerobeta0 = N[Quantile[NormalDistribution[0, 1], N[Length[Select[beta0boots, # < beta0 &]]/B]]]; BCasbeta0 = Table[N[Quantile[beta0boots, N[CDF[NormalDistribution[0, 1], zetzerobeta0 + ((zetzerobeta0 + N[Quantile[NormalDistribution[0, 1], p[[i]]]])/ (1 - alphahatbeta0*(zetzerobeta0 + N[Quantile[NormalDistribution[0, 1], p[[i]]]])))]]]], {i, 1, 6}]; alphahatbeta1 = ((beta1jack - beta1jacks)^3).ones/(6*(((beta1jack - beta1jacks)^2).ones)^(3/2)); zetzerobeta1 = N[Quantile[NormalDistribution[0, 1], N[Length[Select[beta1boots, # < beta1 &]]/B]]]; BCasbeta1 = Table[N[Quantile[beta1boots, N[CDF[NormalDistribution[0, 1], zetzerobeta1 + ((zetzerobeta1 + N[Quantile[NormalDistribution[0, 1], p[[i]]]])/ (1 - alphahatbeta1*(zetzerobeta1 + N[Quantile[NormalDistribution[0, 1], p[[i]]]])))]]]], {i, 1, 6}]; Print["*", "*", "Bootstrap Results for Residual Regression:", B "Replications", "*", "*"]; Print["Summary Statistics"]; Print[ TableForm[{{"", "observed", "bootstrap", "bias", "seboot"}, {"beta0", beta0, beta0boot, biasbeta0, sebootbeta0}, {"beta1", beta1, beta1boot, biasbeta1, sebootbeta1}}]]; Print["Bootstrap Percentiles"]; Print[ TableForm[{{"","0.5%", "2.5%", "5%", "95%", "97.5%", "99.5%"}, Prepend[percentilesbeta0,"beta0"],Prepend[percentilesbeta1,"beta1"]}]]; Print["BCa Percentiles"]; Print[ TableForm[{{"","0.5%", "2.5%", "5%", "95%", "97.5%", "99.5%"}, Prepend[BCasbeta0,"beta0"],Prepend[BCasbeta1,"beta1"]}]]; histbeta0 = Histogram[beta0boots, BarStyle -> {RGBColor[1, 1, 1]}, Epilog -> {Dashing[{.01}], Line[{{beta0, 0}, {beta0, B}}]}, AxesLabel -> {"Bootstrap Beta0", "Frequencies"}, ImageSize -> {500, 200}]; histbeta1 = Histogram[beta1boots, BarStyle -> {RGBColor[1, 1, 1]}, Epilog -> {Dashing[{.01}], Line[{{beta1, 0}, {beta1, B}}]}, AxesLabel -> {"Bootstrap Beta1", "Frequencies"}, ImageSize -> {500, 200}]; ]; (* Bootstrapping The Correlation Coefficient *) BootCorr[data_,B_Integer?Positive]:= Module[{corr,k,i,eboos,bootmatrix,corrsboot,corrboot,sebootcorr,biascorr, ejack,ejacks, cojacks,corrjacks,corrjack,p,percentilescorr,ones, alphahat,zetzero,BCas,hist}, corr=N[Correlation[Transpose[data][[1]],Transpose[data][[2]]]]; k=Length[data]; Do[eboot[i]=Table[RandomKSubset[data,1],{k}],{i,1,B}]; eboos=Table[Flatten[Table[eboot[i],{i,1,B}][[i]],1],{i,1,B}]; bootmatrix=Table[CorrelationMatrix[eboos[[i]]],{i,1,B}]; corrboots=N[Transpose[Transpose[bootmatrix][[1]]][[2]]]; corrboot=N[Mean[corrboots]]; sebootcorr=N[StandardDeviation[corrboots]]; biascorr=corrboot-corr; Do[ejack[i]=Drop[data,{i}],{i,1,k}]; ejacks=Table[ejack[i],{i,1,k}]; cojacks=Table[CorrelationMatrix[ejacks[[i]]],{i,1,k}]; corrjacks=Transpose[Transpose[cojacks][[1]]][[2]]; corrjack=N[Mean[corrjacks]]; p={.005,.025,.05,.95,.975,.995}; percentiles=Table[N[Quantile[corrboots,p[[i]]]],{i,1,6}]; ones=Table[1,{i,1,k}]; alphahat=((corrjack- corrjacks)^3).ones/(6*(((corrjack-corrjacks)^2). ones)^(3/2)); zetzero= N[Quantile[NormalDistribution[0,1], N[Length[Select[corrboots,#{RGBColor[1,1,1]}, Epilog->{Dashing[{.01}],Line[{{corr,0},{corr,B}}]}, AxesLabel->{"Bootstrap Correlations","Frequencies"}, ImageSize->{500,200}]; ] End[] EndPackage[]