(* :Author A.I. McLeod *) (* :http://www.stats.uwo.ca/mcleod *) (* :aim@uwo.ca *) (* :Summary: An important statistical assumption in many statistical models such as in regression and time series arma models is that the residual variance is constant. A common type of inadequacy, known as monotone spread, occurs when the variability increases or decreases monotonically with the fitted value. This inadequacy can usually be rectified by choosing a suitable power or log transformation for the dependent variable. To detect monotone spread, most books on regression recommend plotting the residuals vs fits and looking for a funnel or fan shape. However, Cleveland (1979) pointed out that a funnel shape can arise simply as an artifact to there being more data with high values. Instead Cleveland (1994) recommends plotting the square root of the absolute residual vs fit and then using a nonparameric smoother, robust locally weighted regression, to see if there is a trend up or down. Absolute residual measure the spread. The rationale behind the square root transformation is that if the errors are Gaussian then the skewness of the square root absolute error is almost zero. In McLeod (1999) I showed that if the errors are from some distribution which thicker tails than the normal, then there are better choices than a square root transformation for the absolute residual. McLeod (1999) suggests an improved sl-plot visualization. Two plots are produced. First, a sl-plot of the p-th power absolute residuals vs fits with robust loess smooth. And second, a boxplot of the deviations from the robust loess smooth in the sl-plot. If the boxplot, indicates symmetry then the sl-plot is ok. Otherwise, try a different p-th power. Note p=0 is log. REFERENCES Cleveland, W.S. (1979), Robust locally weighted regression and smoothing scatterplots, Journal of the American Statistical Association, 74, 829-836. Cleveland, W.S. (1993), Visualizing Data. Summit: Hobart Press. McLeod, A.I. (1999), Improved spread-location visualization, Journal of Computational and Graphical Statistics, 8. *) BeginPackage["SLPlot`"] SLPlot::usage = "SLPlot[fits, res, p, span] produces a spread-location plot of the p-th power absolute residual, res, vs the fitted values, fits. The purpose of the SLPlot is to check for monotonic spread in the residuals. The default value for p is 0.5. A robust locally linear smooth with span is shown on the plot. The default value for span is 1.0. SLPlot takes the option BoxPlot which has a default value of False. If BoxPlot->True a second graphic is also produced showing the boxplot of the deviations in the slplot from the smoother. The purpose of the boxplot is to help to check for symmetry of the deviations about the loess trend line. If the deviations are not symmetric then this can usually be corrected by using a different value of p. The visualization of the sl-plot works best if p is chosen so the boxplot indicates symmetry. In most cases, span=1 is suitable."; Options[SLPlot] = {BoxPlot->False}; Begin["`Private`"] Needs["Statistics`ContinuousDistributions`"]; Needs["Statistics`DescriptiveStatistics`"]; NumberVectorQ[x_] := VectorQ[x] && And @@ NumberQ /@ x; DataRange[x_] := {Min[x], Max[x]}; LoessFit[(x_)?NumberQ, data_, span_:0.75, \[Lambda]_:1] := WLSFit[data, LoessWts[x, data, span], \[Lambda], x]; LoessFit[(x_)?VectorQ, data_, span_:0.75, \[Lambda]_:1] := Table[LoessFit[x[[i]], data, span, \[Lambda]], {i, Length[x]}]; SLPlot[fits_?NumberVectorQ, res_?NumberVectorQ, p_:0.5, span_:1, (opts___)?OptionQ]/;(span>0 && NumberQ[p]):= Module[{a, f, r, data2, s, lines,indboxplot}, indboxplot = BoxPlot /. {opts} /. Options[SLPlot]; data2 = Sort[Transpose[{fits, res}], First[#1] < First[#2] & ]; {f, r} = Transpose[data2]; a = If[p == 0, Log[Abs[r]], Abs[r]^p]; data2 = Transpose[{f, a}]; s = RobustLoessFits[data2, span]; lines = Line[Transpose[{f, s}]]; ListPlot[data2, PlotRange -> All, Axes -> False, Frame -> True, FrameLabel -> {"fit", "Abs[res]^p"}, PlotStyle -> {PointSize[0.02], RGBColor[0, 0, 1]}, Epilog -> {RGBColor[0, 1, 0], Thickness[0.02], lines}]; If[indboxplot,BWPlot[a - s]]]; RobustLoessFits[data_, span_:0.75, \[Lambda]_:1, (opts___)?OptionQ] := Module[{x, y, \[Delta], res, rsum = 0, data2, iter = 0, rprev = 1, r = Table[1, {Length[data]}]}, maxiter = MaxIterations /. {opts} /. Options[RobustLoessFits]; data2 = Sort[data, First[#1] < First[#2] & ]; {x, y} = Transpose[data2]; \[Delta] = Table[LoessWts[x[[i]], data, span], {i, 1, Length[x]}]; While[++iter < maxiter && Abs[rsum - rprev] > 0.001, res = Table[y[[i]] - WLSFit[data, \[Delta][[i]]*r, \[Lambda], x[[i]]], {i, 1, Length[x]}]; r = BiSquare[res/(6*Median[Abs[res]])]; rprev = rsum; rsum = Abs[Plus @@ r; ]]; y - res]; Options[RobustLoessFits] = {MaxIterations -> 25}; LoessSummary[data_, span_:0.75, \[Lambda]_:1] := With[{res = LoessResiduals[data, span, \[Lambda]]}, {res -> res, \[Sigma] -> Sqrt[Plus @@ (res^2)/Length[res]], \[Mu] -> 1.199999999999999*(\[Lambda] + 1)/span}]; LoessResiduals[data_, span_:0.75, \[Lambda]_:1] := Last[Transpose[data]] - LoessFit[First[Transpose[data]], data, span, \[Lambda]]; LoessScatterPlot[data_, span_:0.6, \[Lambda]_:1, opts___] := Module[{x, y, fits, data2, lines}, data2 = Sort[data, First[#1] < First[#2] & ]; {x, y} = Transpose[data2]; fits = RobustLoessFits[data2, span, \[Lambda]]; lines = Line[Transpose[{x, fits}]]; ListPlot[data2, PlotRange -> All, Axes -> False, Frame -> True, PlotStyle -> {PointSize[0.02], RGBColor[0, 0, 1]}, Epilog -> {RGBColor[0, 1, 0], Thickness[0.02], lines}]]; LoessPlot[data_, span_:0.75, \[Lambda]_:1, numvalues_:30, opts___] := With[{x = EquispaceVector[First[Transpose[data]], numvalues]}, ListPlot[data, opts, PlotStyle -> {PointSize[0.05]}, PlotRange -> ScaleRectangle[data], Frame -> True, Axes -> False, Epilog -> {Thickness[0.02], RGBColor[0, 1, 1], Line[Transpose[{x, LoessFit[x, data, span, \[Lambda]]}]]}]]; WLSFit[data_, wts_, ldegree_:1, x_] := Fit[Transpose[(wts*#1 & ) /@ Join[{Table[1, {Length[data]}]}, Transpose[data]]], Join[{u}, Table[v^i, {i, ldegree}]], {u, v}] /. {u -> 1, v -> x}; LoessWts[x_, data_, span_] := Tricube[(x - First[Transpose[data]])/LoessDistance[x, data, span]]; Tricube = Compile[{{x, _Real, 1}},If[Abs[#]<1, (1-#^2)^2, 0]& /@x]; Bisquare = Compile[{{x, _Real, 1}}, If[Abs[#]<1, (1-Abs[#]^3)^3, 0]& /@x]; LoessDistance[x_, data_, span_] := Module[{A = Max[1, span], X = First[Transpose[data]], q}, q = Min[Length[X], Ceiling[span*Length[X]]]; A*Sort[Abs[X - x]][[q]]]; EquispaceVector[(x_)?VectorQ, numvalues_:30] := Range[Min[x], Max[x], N[(Max[x] - Min[x])/numvalues]]; ScaleRectangle[data_] := With[{x = Transpose[data]}, (AddEps[#1] & ) /@ {DataRange[First[x]], DataRange[Last[x]]}]; AddEps[{xlo_, xhi_}] := With[{\[Epsilon] = (xhi - xlo)*0.05}, {xlo - \[Epsilon], xhi + \[Epsilon]}]; BWPlot[data_] := Module[{box, boxwidth = 0.4, coldata,datadim, datapts, dmax, dmin, dnadj, dnlim,drange, epsilon, j, jitter, k, outside, outsidepts, medpt, Q1, Q3, step, upadj, uplim, whiskers}, datadim = Dimensions[data]; k = If[Length[datadim] == 2, datadim[[2]], 1]; datapts = outsidepts = whiskers = box = medpt = dmax = dmin = {}; Do[coldata = If[k == 1, data, Column[data, i]]; datapts = Join[datapts, Transpose[{coldata, Table[i, {Length[coldata]}]}]]; medpt = Join[medpt, {PointSize[0.04], Point[{Quantile[coldata, 0.5], i}]}]; Q1 = Quantile[coldata, 0.25]; Q3 = Quantile[coldata, 0.75]; box = Join[box, {RGBColor[0.690207, 0.7685929999999999, 0.870602], Polygon[{{Q1, i - boxwidth}, {Q1, i + boxwidth}, {Q3, i + boxwidth}, {Q3, i - boxwidth}}]}]; step = 1.5*(Q3 - Q1); uplim = Q3 + step; dnlim = Q1 - step; upadj = Max[Select[coldata, #1 <= uplim & ]]; dnadj = Min[Select[coldata, #1 >= dnlim & ]]; whiskers = Join[whiskers, {Thickness[0.005], Line[{{dnadj, i}, {Q1, i}}], Line[{{Q3, i}, {upadj, i}}]}]; outside = Join[Select[coldata, #1 > uplim & ], Select[coldata, #1 < dnlim & ]]; jitter = Table[i, {Length[outside]}]; outsidepts = Join[outsidepts, Point[#1]& /@ Transpose[{outside, jitter}]]; dmax = Max[dmax, Max[coldata]]; (dmin = Min[dmin, Min[coldata]]; ), {i, k}]; epsilon = (dmax - dmin)*0.03; drange = {dmin - epsilon, dmax + epsilon}; ListPlot[datapts, Ticks -> {Automatic, None}, PlotStyle -> {AbsolutePointSize[0]}, PlotRange -> {drange, {0, k + 1}}, Axes -> {True, False}, Epilog -> {box, medpt, whiskers, {PointSize[0.03],RGBColor[0.7,0.1,0],outsidepts}}]]; End[ ] EndPackage[ ]