(***********************************************************************

                    Mathematica-Compatible Notebook

This notebook can be used on any computer system with Mathematica 4.0,
MathReader 4.0, or any compatible application. The data for the notebook 
starts with the line containing stars above.

To get the notebook into a Mathematica-compatible application, do one of 
the following:

* Save the data starting with the line of stars above into a file
  with a name ending in .nb, then open the file inside the application;

* Copy the data starting with the line of stars above to the
  clipboard, then use the Paste menu command inside the application.

Data for notebooks contains only printable 7-bit ASCII and can be
sent directly in email or through ftp in text mode.  Newlines can be
CR, LF or CRLF (Unix, Macintosh or MS-DOS style).

NOTE: If you modify the data for this notebook not in a Mathematica-
compatible application, you must delete the line below containing the 
word CacheID, otherwise Mathematica-compatible applications may try to 
use invalid cache data.

For more information on notebooks and Mathematica-compatible 
applications, contact Wolfram Research:
  web: http://www.wolfram.com
  email: info@wolfram.com
  phone: +1-217-398-0700 (U.S.)

Notebook reader applications are available free of charge from 
Wolfram Research.
***********************************************************************)

(*CacheID: 232*)


(*NotebookFileLineBreakTest
NotebookFileLineBreakTest*)
(*NotebookOptionsPosition[     25516,        731]*)
(*NotebookOutlinePosition[     26296,        757]*)
(*  CellTagsIndexPosition[     26252,        753]*)
(*WindowFrame->Normal*)



Notebook[{

Cell[CellGroupData[{
Cell["Math 384", "Subtitle"],

Cell[CellGroupData[{

Cell["Broyden's method for root-finding", "Subsubsection",
  CellDingbat->None],

Cell[TextData[{
  "Broyden's method is an approximation of Newton's, where we use a rank-1 \
update at each step to approximate the gradient matrix. We now will \
approximate the action of the gradient matrix on the vector ",
  Cell[BoxData[
      \(\(\ x\&\[RightArrow]\^k\)\)],
    FontWeight->"Bold"],
  " - ",
  Cell[BoxData[
      \(\(x\&\[RightArrow]\^*\)\)],
    FontWeight->"Bold"],
  " where ",
  Cell[BoxData[
      \(\(x\&\[RightArrow]\^*\)\)],
    FontWeight->"Bold"],
  " is the root, that is, the point in ",
  Cell[BoxData[
      \(\[DoubleStruckCapitalR]\^n\)],
    FontWeight->"Bold"],
  " for which our function ",
  Cell[BoxData[
      StyleBox[\(g\&\[RightArrow]\),
        FontWeight->"Bold"]]],
  "[",
  Cell[BoxData[
      \(x\&\[RightArrow]\)],
    FontWeight->"Bold"],
  "] = ",
  Cell[BoxData[
      \(0\&\[RightArrow]\)],
    FontWeight->"Bold"],
  ". In the examples below we will apply this to minimization, so our vector \
function  ",
  Cell[BoxData[
      StyleBox[\(g\&\[RightArrow]\),
        FontWeight->"Bold"]]],
  "[",
  Cell[BoxData[
      \(x\&\[RightArrow]\)],
    FontWeight->"Bold"],
  "] will actually be the gradient of some ordinary function  f[",
  Cell[BoxData[
      \(x\&\[RightArrow]\)],
    FontWeight->"Bold"],
  "], and the matrix ",
  Cell[BoxData[
      \(\[Del]\)]],
  " ",
  Cell[BoxData[
      StyleBox[\(g\&\[RightArrow]\),
        FontWeight->"Bold"]]],
  "[",
  Cell[BoxData[
      \(x\&\[RightArrow]\)],
    FontWeight->"Bold"],
  "] will be the Hessian ",
  Cell[BoxData[
      \(H\_f[\(x\& \[Rule] \)]\)]],
  ". This may help to explain the naming of variables below."
}], "Text"],

Cell[BoxData[
    \(BroydenUpdate[initpoint_, \ vars_, \ grad_, hess_, \ 
        print_:  False]\  := \ 
      Module[{nextpoint, \ update, \ 
          delvec}, \[IndentingNewLine]nextpoint\  = \ 
          initpoint\  + \ 
            LinearSolve[
              hess, \(-grad\) /. 
                Thread[vars \[Rule] initpoint]]; \[IndentingNewLine]If\ [
          print, \ Print["\<next point: \>", \ 
            nextpoint]]; \[IndentingNewLine]delvec\  = \ \((nextpoint - 
              initpoint)\); \[IndentingNewLine]update\  = \ 
          Outer[Times, grad /. Thread[vars \[Rule] nextpoint], \ 
              delvec]\ /\ \((delvec . delvec)\); \[IndentingNewLine]If\ [
          print, \ Print["\<rank 1 update matrix: \>", \ 
            update]]; \[IndentingNewLine]nexthess\  = \ 
          hess\  + \ update; \[IndentingNewLine]{nextpoint, \ 
          hess + update}\[IndentingNewLine]]\)], "Input",
  PageBreakAbove->False],

Cell["\<\
Here is a quick example using three iterations. I am not using a quadratic \
function because we know that for such functions the first iteration (using \
Newton's method for that step) will get us right to the minimizer.\
\>", "Text",
  PageBreakAbove->False],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(fquartic[x_, y_] := x^4\  + \ 3*x*y\  + \ 3*y^4;\)\), 
  "\[IndentingNewLine]", 
    \(\(vars\  = \ {x, y};\)\), "\[IndentingNewLine]", 
    \(\(gradf\  = \ Map[D[fquartic[x, y], #] &, \ vars];\)\), 
  "\[IndentingNewLine]", 
    \(hessf\  = \ Outer[D, gradf, vars]\), "\[IndentingNewLine]", 
    \(\(pt\  = \ {1. , 1. };\)\), "\[IndentingNewLine]", 
    \(hess\  = \ hessf\  /. \ Thread[vars \[Rule] pt]\)}], "Input",
  PageBreakAbove->False],

Cell[BoxData[
    \({{12\ x\^2, 3}, {3, 36\ y\^2}}\)], "Output",
  PageBreakAbove->False],

Cell[BoxData[
    \({{12.`, 3}, {3, 36.`}}\)], "Output",
  PageBreakAbove->False]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(Do[{pt, \ hess}\  
        = \[IndentingNewLine]BroydenUpdate[pt, \ vars, \ gradf, hess, \ 
          True], \ {3}]\)], "Input",
  PageBreakAbove->False],

Cell[BoxData[
    InterpretationBox[
      \("next point: "\[InvisibleSpace]{0.5106382978723405`, 
          0.624113475177305`}\),
      SequenceForm[ "next point: ", {.5106382978723405, .62411347517730498}],
      Editable->False]], "Print"],

Cell[BoxData[
    InterpretationBox[
      \("rank 1 update matrix: "\[InvisibleSpace]{{\(-3.090839265093622`\), 
            \(-2.374122913767565`\)}, 
          {\(-5.718072996681642`\), \(-4.392143026436625`\)}}\),
      SequenceForm[ 
      "rank 1 update matrix: ", {{-3.0908392650936221, -2.374122913767565}, {
        -5.7180729966816424, -4.3921430264366252}}],
      Editable->False]], "Print"],

Cell[BoxData[
    InterpretationBox[
      \("next point: "\[InvisibleSpace]{0.2521484273498367`, 
          0.46112401168505757`}\),
      SequenceForm[ "next point: ", {.25214842734983672, .46112401168505757}]
      ,
      Editable->False]], "Print"],

Cell[BoxData[
    InterpretationBox[
      \("rank 1 update matrix: "\[InvisibleSpace]{{\(-4.006779222696074`\), 
            \(-2.5264541102482485`\)}, 
          {\(-5.350854143395891`\), \(-3.373953665164758`\)}}\),
      SequenceForm[ 
      "rank 1 update matrix: ", {{-4.0067792226960739, -2.5264541102482485}, {
        -5.3508541433958907, -3.3739536651647581}}],
      Editable->False]], "Print"],

Cell[BoxData[
    InterpretationBox[
      \("next point: "\[InvisibleSpace]{\(-0.10975637055728932`\), 
          0.2892298217323165`}\),
      SequenceForm[ "next point: ", {-.10975637055728932, .28922982173231648}]
      ,
      Editable->False]], "Print",
  PageBreakBelow->False],

Cell[BoxData[
    InterpretationBox[
      \("rank 1 update matrix: "\[InvisibleSpace]{{\(-1.9443168013799734`\), 
            \(-0.9234935914568438`\)}, 
          {0.08776182900944288`, 0.041684300936569905`}}\),
      SequenceForm[ 
      "rank 1 update matrix: ", {{-1.9443168013799734, -.92349359145684384}, {
        .087761829009442877, .041684300936569905}}],
      Editable->False]], "Print"]
}, Open  ]],

Cell[TextData[{
  "We will do 10 more iterations with printing suppressed, and see where we \
land. We will also evaluate our objective function there to see how small it \
is at that point. Then we will check the gradient to see if it is close to ",
  Cell[BoxData[
      \(0\&\[RightArrow]\)],
    FontWeight->"Bold"],
  "."
}], "Text",
  PageBreakAbove->False,
  PageBreakBelow->Automatic],

Cell[CellGroupData[{

Cell[BoxData[{
    \(Do[{pt, \ hess}\  
        = \[IndentingNewLine]BroydenUpdate[pt, \ vars, \ gradf, hess], \ {10}]
      \), "\[IndentingNewLine]", 
    \({pt, \ hess}\), "\[IndentingNewLine]", 
    \(Apply[fquartic, \ pt]\), "\[IndentingNewLine]", 
    \(gradf\  /. \ Thread[vars \[Rule] pt]\)}], "Input"],

Cell[BoxData[
    \({{\(-0.7549149681269997`\), 0.5735979088983`}, 
      {{6.925800715132349`, 2.506542851267094`}, 
        {3.071304298503061`, 11.443682651072614`}}}\)], "Output"],

Cell[BoxData[
    \(\(-0.6495190520421724`\)\)], "Output"],

Cell[BoxData[
    \({\(-0.00010019528935645283`\), \(-0.00008013350398261565`\)}\)], 
  "Output"]
}, Open  ]],

Cell[TextData[{
  "Looks good. Be aware that I did not make the code fail-safe. If I try 10 \
more iterations it will die a horrid death due to inability to handle \
degenerate matrices. In other words, once it gets too close to the critical \
point it should be made to cease and desist from further iterations.\n\n\
Notice that the Hessian is not always positive definite at all points ",
  Cell[BoxData[
      \({x, y}\)]],
  ".This can have an interesting effect on the sequence of points we obtain. \
If we start at ",
  Cell[BoxData[
      \({0, 2}\)]],
  " we will INCREASE in value for one iteration. Fortunately once ",
  Cell[BoxData[
      \(x\)]],
  " or ",
  Cell[BoxData[
      \(y\)]],
  " gets large the Hessian becomes positive definite provided the other \
coordinate is not too small. Moreover using Broyden's method tends initially \
to perturb us away from the actual Newton's sequence in this example, which \
appears to be beneficial; I did not try it, but I think Newton's method will \
diverge as it bounces from one coordinate axis to the other and back, getting \
ever farther from the minimizer which is located somewhere near the origin. \
In any case Broyden's method will take a very long time to converge with this \
particular starting point."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(fquartic[x_, y_] := x^4\  + \ 3*x*y\  + \ 3*y^4;\)\), 
  "\[IndentingNewLine]", 
    \(\(vars\  = \ {x, y};\)\), "\[IndentingNewLine]", 
    \(\(gradf\  = \ Map[D[fquartic[x, y], #] &, \ vars];\)\), 
  "\[IndentingNewLine]", 
    \(\(hessf\  = \ Outer[D, gradf, vars];\)\), "\[IndentingNewLine]", 
    \(\(pt\  = \ {0. , 2. };\)\), "\[IndentingNewLine]", 
    \(\(hess\  = \ hessf\  /. \ Thread[vars \[Rule] pt];\)\), 
  "\[IndentingNewLine]", 
    \(Do[{pt, \ hess}\  
        = \[IndentingNewLine]BroydenUpdate[pt, \ vars, \ gradf, hess, \ 
          True], \ {3}]\)}], "Input"],

Cell[BoxData[
    InterpretationBox[\("next point: "\[InvisibleSpace]{64.`, 0.`}\),
      SequenceForm[ "next point: ", {64.0, 0.0}],
      Editable->False]], "Print"],

Cell[BoxData[
    InterpretationBox[
      \("rank 1 update matrix: "\[InvisibleSpace]{{16368.015609756098`, 
            \(-511.50048780487805`\)}, 
          {2.9970731707317073`, \(-0.09365853658536585`\)}}\),
      SequenceForm[ 
      "rank 1 update matrix: ", {{16368.015609756098, -511.50048780487805}, {
        2.9970731707317073, -.093658536585365854}}],
      Editable->False]], "Print"],

Cell[BoxData[
    InterpretationBox[
      \("next point: "\[InvisibleSpace]{\(-0.021063921251368356`\), 
          1.33377725298251`}\),
      SequenceForm[ "next point: ", {-.021063921251368356, 1.33377725298251}]
      ,
      Editable->False]], "Print"],

Cell[BoxData[
    InterpretationBox[
      \("rank 1 update matrix: "\[InvisibleSpace]{{\(-0.06247253947322751`\), 
            0.0013015162039158772`}, 
          {\(-0.443562570306292`\), 0.009240922132701475`}}\),
      SequenceForm[ 
      "rank 1 update matrix: ", {{-.062472539473227513, .0013015162039158772}
        , {-.44356257030629198, .0092409221327014752}}],
      Editable->False]], "Print"],

Cell[BoxData[
    InterpretationBox[
      \("next point: "\[InvisibleSpace]{\(-0.027433480817310556`\), 
          1.1366179244964043`}\),
      SequenceForm[ 
      "next point: ", {-.027433480817310556, 1.1366179244964043}],
      Editable->False]], "Print"],

Cell[BoxData[
    InterpretationBox[
      \("rank 1 update matrix: "\[InvisibleSpace]{{\(-0.5581448654591324`\), 
            \(-17.276464052599295`\)}, 
          {\(-2.8708682027003216`\), \(-88.8630432224841`\)}}\),
      SequenceForm[ 
      "rank 1 update matrix: ", {{-.55814486545913244, -17.276464052599295}, {
        -2.8708682027003216, -88.863043222484094}}],
      Editable->False]], "Print"]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[{
    \(Do[{pt, \ hess}\  
        = \[IndentingNewLine]BroydenUpdate[pt, \ vars, \ gradf, hess], \ {20}]
      \), "\[IndentingNewLine]", 
    \({pt, \ hess}\), "\[IndentingNewLine]", 
    \(Apply[fquartic, \ pt]\), "\[IndentingNewLine]", 
    \(gradf\  /. \ Thread[vars \[Rule] pt]\)}], "Input",
  PageBreakAbove->False],

Cell[BoxData[
    \({{\(-0.06393775706776542`\), 0.2518379072289761`}, 
      {{8168.302393846676`, 6028.730190662048`}, 
        {1.8945498714004463`, 1.4505684716507954`}}}\)], "Output"],

Cell[BoxData[
    \(\(-0.03622196434854112`\)\)], "Output"],

Cell[BoxData[
    \({0.7544682020771258`, \(-0.00014750431199139769`\)}\)], "Output"]
}, Open  ]],

Cell["As I said, slow convergence along this route.", "Text",
  PageBreakBelow->True],

Cell["\<\
Here is an example we have played with before in this chapter. We will \
attempt to minimize the function f by finding a critical point of its \
gradient. There are better ways than to use Broyden's method, e.g. BFGS or \
Conjugate Gradient methods, but this is for purposes of illustration.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(f[x1_, x2_, x3_, x4_] := 
      3*x1^2*x2\  - \ x1*x2*x3\  + \ x2*x4^2\  + \ 2*x1\  - \ 7*x3\  + 
        Exp[x1^2 + x2^4 + x3^2 + x4^2]\), "\n", 
    \(\(vars\  = \ {x1, x2, x3, x4};\)\), "\n", 
    \(\(gradf\  = \ Map[D[f[x1, x2, x3, x4], #] &, \ vars];\)\), "\n", 
    \(\(hessf\  = \ Outer[D, gradf, vars];\)\), "\n", 
    \(\(pt\  = \ N[{2, 3, \(-1\), 6} (*\(,\)\(\ \)\(200\)*) ];\)\), "\n", 
    \(\(hess\  = \ hessf\  /. \ Thread[vars \[Rule] pt];\)\), "\n", 
    \(Do[{pt, \ hess}\  = \[IndentingNewLine]BroydenUpdate[pt, \ vars, \ 
          gradf, \ \ hess], \ {20}]\), "\n", 
    \(pt\), "\n", 
    \(Apply[f, \ pt]\), "\n", 
    \(gradf\  /. \ Thread[vars -> \ pt]\)}], "Input"],

Cell[BoxData[
    \({1.8494609332916883`, 2.9243980101871876`, \(-0.9247304666458444`\), 
      5.5483827998750765`}\)], "Output"],

Cell[BoxData[
    \(9.774213747117582`*^46\)], "Output"],

Cell[BoxData[
    \({3.6154052957873067`*^47, 9.778027687347928`*^48, 
      \(-1.8077026478936538`*^47\), 1.0846215887361943`*^48}\)], "Output"]
}, Open  ]],

Cell["\<\
This is not terribly good. We will do 5 more iterations and check for \
improvement.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(Do[{pt, \ hess}\  
        = \[IndentingNewLine]BroydenUpdate[pt, \ vars, \ gradf, \ \ hess], \ 
      {5}]\), "\n", 
    \(pt\)}], "Input"],

Cell[BoxData[
    \({1.8117076802222172`, 2.9054221531237197`, \(-0.9058538401111091`\), 
      5.435123040666667`}\)], "Output"]
}, Open  ]],

Cell[TextData[{
  "No significant improvement. It turns out we suffer from ill conditioning \
of the Hessian, so if we attempt to do further iterations ",
  Cell[BoxData[
      \(LinearSolve\)]],
  " will complain.  One option is to up the initial precision and let ",
  Cell[BoxData[
      \(LinearSolve\)]],
  " use high precision arithmetic, but this is sick-dog-slow and also gives \
no improvement anyway (I tried it). In fact, this is a good time to restart \
with an actual Jacobian, that is, do an honest Newton iteration, and see \
where it takes us."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(hess\  = \ hessf\  /. \ Thread[vars \[Rule] pt];\)\), 
  "\[IndentingNewLine]", 
    \(Do[{pt, \ hess}\  
        = \[IndentingNewLine]BroydenUpdate[pt, \ vars, \ gradf, \ \ hess], \ 
      {20}]\), "\[IndentingNewLine]", 
    \(pt\), "\[IndentingNewLine]", 
    \(Apply[f, \ pt]\), "\[IndentingNewLine]", 
    \(gradf\  /. \ Thread[vars -> \ pt]\)}], "Input"],

Cell[BoxData[
    \({1.651819055886123`, 2.819507834530066`, \(-0.8259095279430598`\), 
      4.955457167658372`}\)], "Output"],

Cell[BoxData[
    \(3.907502477081578`*^39\)], "Output"],

Cell[BoxData[
    \({1.2908974105131159`*^40, 3.503314853377808`*^41, 
      \(-6.454487052565566`*^39\), 3.8726922315393495`*^40}\)], "Output"]
}, Open  ]],

Cell[TextData[{
  "This has moved us more noticeably, and also appears to be shrinking the \
gradient toward ",
  Cell[BoxData[
      \(0\&\[RightArrow]\)],
    FontWeight->"Bold"],
  ". I will do an outer loop of 15 iterations using an inner loop of 10 \
iterations of Broyden updates. Each time we finish this we restart the next \
step of the outer loop with an actual Jacobian."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(Do[\[IndentingNewLine]hess\  = \ 
        hessf\  /. \ Thread[vars \[Rule] pt]; \[IndentingNewLine]Do[
        {pt, \ hess}\  
          = \[IndentingNewLine]BroydenUpdate[pt, \ vars, \ 
            gradf, \ \ hess], \ {10}], \[IndentingNewLine]{15}\ ]\), 
  "\[IndentingNewLine]", 
    \(pt\), "\[IndentingNewLine]", 
    \(Apply[f, \ pt]\), "\[IndentingNewLine]", 
    \(gradf\  /. \ Thread[vars -> \ pt]\)}], "Input"],

Cell[BoxData[
    \({\(-0.2590582254320937`\), 0.09522507526304815`, 1.054290298765001`, 
      \(-0.000014051911739840122`\)}\)], "Output"],

Cell[BoxData[
    \(\(-4.602752531486319`\)\)], "Output"],

Cell[BoxData[
    \({0.06760176824390339`, 0.48568209597363793`, \(-0.12198898471942954`\), 
      \(-0.00009401968252042713`\)}\)], "Output"]
}, Open  ]],

Cell[TextData[{
  "This appears to be near to the critical value found in prior handouts. How \
did I know to use ",
  Cell[BoxData[
      \(10\)]],
  " iterations in the inner loop and ",
  Cell[BoxData[
      \(15\)]],
  " in the outer? Trial-and-error (mostly the latter). What happens if we go \
further? It gets worse. Offhand I am not sure why this is so, but I think it \
has to do with instabilities in the update process near the critical point. \
This is a situation in which one can use Newton's method unadulterated, or \
perhaps switch to high precision. The fact that we get so close and then have \
trouble is an indication that either the problem is quite poorly behaved (I \
made it up more or less at random several weeks ago, so I have no idea how it \
ought to behave), or else I have made a coding mistake somewhere above. My \
guess is it is primarily bad behavior due to use of a root-finder for \
minimization. We are not using symmetric matrix updates so the matrix is not \
really approximating the Hessian, and moreover eigenvectors are not \
orthogonal so a \"bad\" direction may be very close to a \"good\" direction, \
hence the possibility of numeric instabilities."
}], "Text"],

Cell["\<\
Here is an interesting tidbit. We can reevaluate the Hessian at our current \
point (or just use the approximate Hessian we last obtained from the update \
formula), and continue to iterate WITHOUT ALTERING THE HESSIAN. It turns out \
that this will get us quickly to the critical point we found in previous \
hand-outs.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(Do[{pt, \ hesstmp}\  = \[IndentingNewLine]BroydenUpdate[pt, \ vars, \ 
          gradf, \ \ hess], \ {50}]\), "\[IndentingNewLine]", 
    \(pt\), "\[IndentingNewLine]", 
    \(Apply[f, \ pt]\)}], "Input"],

Cell[BoxData[
    \({\(-0.5294917969315399`\), \(-0.4512596813075078`\), 
      0.9879172911363042`, \(-4.115695093642184`*^-6\)}\)], "Output"],

Cell[BoxData[
    \(\(-4.928735748647506`\)\)], "Output"]
}, Open  ]],

Cell[TextData[{
  "Why is this useful? Well, for one thing, we got our minimizer. But notice \
that we expended alot of wasted effort because we calculated updated Hessian \
approximations at every step only to throw them away (I compute but do not \
use ",
  Cell[BoxData[
      \(hesstmp\)]],
  " in each iteration above). So we could write code that does not compute \
the update. There is substantial further efficiency one can gain with little \
coding effort. Now that the matrix is not changing we do not need to use the \
O(",
  Cell[BoxData[
      \(n\^3\)]],
  ") ",
  Cell[BoxData[
      \(LinearSolve\)]],
  " algorithm per iteration (where n is the number of variables). Instead we \
can do a single LU decomposition, which is O(",
  Cell[BoxData[
      \(n\^3\)]],
  "), and in each subsequent iteration just do a back-solve call (using ",
  Cell[BoxData[
      \(LUBackSubstitution\)]],
  ", as per a previous hand-out). This is only O(",
  Cell[BoxData[
      \(n\^2\)]],
  "), so for a high dimensional problem this method cuts substantially the \
computational expense. In practice there are hybrid codes that will do this, \
with interspersed rank-1 updates or computation of full Jacobians every so \
often."
}], "Text"],

Cell["\<\
I have a few other remarks. First, recall that one reason for Broyden's \
method is to cut the cost of evaluating a matrix at each step. This matrix is \
formed from derivatives of some function. If the function evaluations are \
cheap but the derivative evaluations are not, one can instead use \
approximated derivatives via numerical differentiation. This is where we \
approximate a derivative with a difference quotient. I believe this tactic is \
not often used, if only because it is not common to have expensive derivative \
evaluations but cheap function evaluations.\
\>", "Text"],

Cell[TextData[{
  "There is another efficiency with espect to Broyden's method that I should \
mention. One really does not require the full expense of ",
  Cell[BoxData[
      \(LinearSolve\)]],
  ". There is something called a QR decomposition of a matrix that can be \
used to advantage. We can write our matrix ",
  Cell[BoxData[
      \(H\)]],
  " as ",
  Cell[BoxData[
      \(Q . R\)]],
  ". In essence this is Gram-Schmidt orthonormalization (this gives the \
orthogonal matrix ",
  Cell[BoxData[
      \(Q\)]],
  ") with ",
  Cell[BoxData[
      \(R\)]],
  " upper triangular. To solve H",
  Cell[BoxData[
      \(x\&\[RightArrow]\)],
    FontWeight->"Bold"],
  " = ",
  Cell[BoxData[
      StyleBox[\(b\&\[RightArrow]\),
        FontWeight->"Bold"]]],
  " for ",
  Cell[BoxData[
      \(x\&\[RightArrow]\)],
    FontWeight->"Bold"],
  " one writes R",
  Cell[BoxData[
      \(x\&\[RightArrow]\)],
    FontWeight->"Bold"],
  " = Transpose[Q]",
  Cell[BoxData[
      StyleBox[\(b\&\[RightArrow]\),
        FontWeight->"Bold"]]],
  " (using the fact that ",
  Cell[BoxData[
      \(Q\)]],
  " is orthogonal so its inverse is its transpose). Since R is upper \
triangular we only need to do back-substitution, and this is O(",
  Cell[BoxData[
      \(n\^2\)]],
  "). So the entire process is only of complexity O(",
  Cell[BoxData[
      \(n\^2\)]],
  ") provided we have a fast way to obtain the QR decomposition. As you may \
recall, Gram-Schmidt is O(",
  Cell[BoxData[
      \(n\^3\)]],
  ") complexity. But if we do it once, and then change our matrix with a \
rank-1 update, it is also possible to update ",
  Cell[BoxData[
      \(Q\)]],
  " and ",
  Cell[BoxData[
      \(R\)]],
  " with O(",
  Cell[BoxData[
      \(n\^2\)]],
  ") operations. For further information on this you might look at \
\"Numerical Recipes in C\" by Press et al, or \"Matrix Computations\" by \
Golub and Van Loan. This latter contains a wealth of information on numerical \
linear algebra. \"Numerical Recipes\" contains brief descriptions (and code) \
for countless algorithms from linear algebra, linear and nonlinear \
programming, numerical integration, function approximation, and more."
}], "Text"],

Cell[TextData[{
  "It is worthy of note that ",
  Cell[BoxData[
      \(FindMinimum\)]],
  " will handle this example just fine."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(FindMinimum[Evaluate[f[x1, x2, x3, x4]], \[IndentingNewLine]\ 
      {x1, Random[]}, {x2, Random[]}, {x3, Random[]}, {x4, Random[]}]\)], 
  "Input"],

Cell[BoxData[
    \({\(-4.928757857807448`\), 
      {x1 \[Rule] \(-0.5303644096048592`\), 
        x2 \[Rule] \(-0.4535766948347582`\), x3 \[Rule] 0.9875862876975207`, 
        x4 \[Rule] \(-0.00022175717226036737`\)}}\)], "Output"]
}, Open  ]],

Cell[TextData[{
  "If we use the ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " implementation of the BFGS algorithm we get even closer. The way to \
access this is via the ",
  Cell[BoxData[
      \(QuasiNewton\)]],
  " method option to ",
  Cell[BoxData[
      \(FindMinimum\)]],
  ". I should mention that this code uses step-length computations (what we \
call line searching in class) that conform to the Wolfe's theorem criteria \
and also uses QR updates so it is far better optimized than the code \
fragments shown above or in other handouts from class."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(FindMinimum[Evaluate[f[x1, x2, x3, x4]], \[IndentingNewLine]\ 
      {x1, Random[]}, {x2, Random[]}, {x3, Random[]}, {x4, Random[]}, \ 
      Method \[Rule] QuasiNewton]\)], "Input"],

Cell[BoxData[
    \({\(-4.928758847368604`\), 
      {x1 \[Rule] \(-0.5309270870974285`\), 
        x2 \[Rule] \(-0.45372132866995235`\), x3 \[Rule] 0.9874185345200754`, 
        x4 \[Rule] \(-6.152579825376954`*^-8\)}}\)], "Output"]
}, Open  ]]
}, Open  ]]
}, Open  ]]
},
FrontEndVersion->"4.0 for Microsoft Windows",
ScreenRectangle->{{0, 640}, {0, 407}},
WindowSize->{496, 295},
WindowMargins->{{52, Automatic}, {33, Automatic}},
PrintingCopies->1,
PrintingPageRange->{Automatic, Automatic},
PrintingOptions->{"PrintingMargins"->{{36, 36}, {36, 50.375}}}
]


(***********************************************************************
Cached data follows.  If you edit this Notebook file directly, not using
Mathematica, you must remove the line containing CacheID at the top of 
the file.  The cache data will then be recreated when you save this file 
from within Mathematica.
***********************************************************************)

(*CellTagsOutline
CellTagsIndex->{}
*)

(*CellTagsIndex
CellTagsIndex->{}
*)

(*NotebookFileOutline
Notebook[{

Cell[CellGroupData[{
Cell[1739, 51, 28, 0, 64, "Subtitle"],

Cell[CellGroupData[{
Cell[1792, 55, 79, 1, 43, "Subsubsection"],
Cell[1874, 58, 1645, 59, 148, "Text"],
Cell[3522, 119, 941, 18, 310, "Input",
  PageBreakAbove->False],
Cell[4466, 139, 270, 5, 71, "Text",
  PageBreakAbove->False],

Cell[CellGroupData[{
Cell[4761, 148, 466, 9, 130, "Input",
  PageBreakAbove->False],
Cell[5230, 159, 89, 2, 29, "Output",
  PageBreakAbove->False],
Cell[5322, 163, 81, 2, 29, "Output",
  PageBreakAbove->False]
}, Open  ]],

Cell[CellGroupData[{
Cell[5440, 170, 173, 4, 50, "Input",
  PageBreakAbove->False],
Cell[5616, 176, 243, 5, 25, "Print"],
Cell[5862, 183, 403, 8, 44, "Print"],
Cell[6268, 193, 253, 6, 25, "Print"],
Cell[6524, 201, 405, 8, 44, "Print"],
Cell[6932, 211, 284, 7, 25, "Print",
  PageBreakBelow->False],
Cell[7219, 220, 401, 8, 44, "Print"]
}, Open  ]],
Cell[7635, 231, 392, 10, 72, "Text",
  PageBreakAbove->False,
  PageBreakBelow->Automatic],

Cell[CellGroupData[{
Cell[8052, 245, 310, 6, 110, "Input"],
Cell[8365, 253, 183, 3, 48, "Output"],
Cell[8551, 258, 58, 1, 29, "Output"],
Cell[8612, 261, 97, 2, 29, "Output"]
}, Open  ]],
Cell[8724, 266, 1288, 26, 299, "Text"],

Cell[CellGroupData[{
Cell[10037, 296, 603, 12, 170, "Input"],
Cell[10643, 310, 167, 3, 25, "Print"],
Cell[10813, 315, 398, 8, 44, "Print"],
Cell[11214, 325, 257, 6, 25, "Print"],
Cell[11474, 333, 406, 8, 44, "Print"],
Cell[11883, 343, 261, 6, 25, "Print"],
Cell[12147, 351, 406, 8, 44, "Print"]
}, Open  ]],

Cell[CellGroupData[{
Cell[12590, 364, 335, 7, 110, "Input",
  PageBreakAbove->False],
Cell[12928, 373, 188, 3, 48, "Output"],
Cell[13119, 378, 59, 1, 29, "Output"],
Cell[13181, 381, 85, 1, 29, "Output"]
}, Open  ]],
Cell[13281, 385, 85, 1, 33, "Text",
  PageBreakBelow->True],
Cell[13369, 388, 315, 5, 90, "Text"],

Cell[CellGroupData[{
Cell[13709, 397, 711, 13, 270, "Input"],
Cell[14423, 412, 130, 2, 29, "Output"],
Cell[14556, 416, 56, 1, 29, "Output"],
Cell[14615, 419, 145, 2, 48, "Output"]
}, Open  ]],
Cell[14775, 424, 108, 3, 33, "Text"],

Cell[CellGroupData[{
Cell[14908, 431, 161, 4, 70, "Input"],
Cell[15072, 437, 129, 2, 29, "Output"]
}, Open  ]],
Cell[15216, 442, 572, 12, 128, "Text"],

Cell[CellGroupData[{
Cell[15813, 458, 383, 8, 130, "Input"],
Cell[16199, 468, 127, 2, 29, "Output"],
Cell[16329, 472, 56, 1, 29, "Output"],
Cell[16388, 475, 144, 2, 48, "Output"]
}, Open  ]],
Cell[16547, 480, 394, 9, 92, "Text"],

Cell[CellGroupData[{
Cell[16966, 493, 441, 9, 170, "Input"],
Cell[17410, 504, 140, 2, 29, "Output"],
Cell[17553, 508, 57, 1, 29, "Output"],
Cell[17613, 511, 142, 2, 29, "Output"]
}, Open  ]],
Cell[17770, 516, 1209, 21, 261, "Text"],
Cell[18982, 539, 344, 6, 90, "Text"],

Cell[CellGroupData[{
Cell[19351, 549, 225, 4, 90, "Input"],
Cell[19579, 555, 143, 2, 29, "Output"],
Cell[19725, 559, 57, 1, 29, "Output"]
}, Open  ]],
Cell[19797, 563, 1240, 30, 242, "Text"],
Cell[21040, 595, 600, 9, 147, "Text"],
Cell[21643, 606, 2195, 69, 301, "Text"],
Cell[23841, 677, 141, 5, 33, "Text"],

Cell[CellGroupData[{
Cell[24007, 686, 168, 3, 70, "Input"],
Cell[24178, 691, 233, 4, 48, "Output"]
}, Open  ]],
Cell[24426, 698, 587, 15, 109, "Text"],

Cell[CellGroupData[{
Cell[25038, 717, 202, 3, 70, "Input"],
Cell[25243, 722, 233, 4, 48, "Output"]
}, Open  ]]
}, Open  ]]
}, Open  ]]
}
]
*)




(***********************************************************************
End of Mathematica Notebook file.
***********************************************************************)

