GradientDescent #
Main results #
This file mainly concentrates on the Gradient Descent algorithm for smooth convex optimization problems.
We prove the O(1 / k) rate for this algorithm.
class
GradientDescent
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
(f' : E → E)
(initial_x : E)
:
Type u_1
- x : ℕ → E
- l : NNReal
- diff : ∀ (x₁ : E), HasGradientAt f (f' x₁) x₁
- smooth : LipschitzWith (GradientDescent.l f f' initial_x) f'
- update : ∀ (k : ℕ), GradientDescent.x f f' initial_x (k + 1) = GradientDescent.x f f' initial_x k - GradientDescent.a f f' initial_x k • f' (GradientDescent.x f f' initial_x k)
- hl : GradientDescent.l f f' initial_x > 0
- step₁ : ∀ (k : ℕ), GradientDescent.a f f' initial_x k > 0
- initial : GradientDescent.x f f' initial_x 0 = initial_x
Instances
theorem
GradientDescent.diff
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
(initial_x : E)
[self : GradientDescent f f' initial_x]
(x₁ : E)
:
HasGradientAt f (f' x₁) x₁
theorem
GradientDescent.smooth
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{initial_x : E}
[self : GradientDescent f f' initial_x]
:
LipschitzWith (GradientDescent.l f f' initial_x) f'
theorem
GradientDescent.update
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{initial_x : E}
[self : GradientDescent f f' initial_x]
(k : ℕ)
:
GradientDescent.x f f' initial_x (k + 1) = GradientDescent.x f f' initial_x k - GradientDescent.a f f' initial_x k • f' (GradientDescent.x f f' initial_x k)
theorem
GradientDescent.hl
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{initial_x : E}
[self : GradientDescent f f' initial_x]
:
GradientDescent.l f f' initial_x > 0
theorem
GradientDescent.step₁
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{initial_x : E}
[self : GradientDescent f f' initial_x]
(k : ℕ)
:
GradientDescent.a f f' initial_x k > 0
theorem
GradientDescent.initial
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{initial_x : E}
[self : GradientDescent f f' initial_x]
:
GradientDescent.x f f' initial_x 0 = initial_x
class
Gradient_Descent_fix_stepsize
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
(f' : E → E)
(initial_x : E)
:
Type u_1
- x : ℕ → E
- a : ℝ
- l : NNReal
- diff : ∀ (x₁ : E), HasGradientAt f (f' x₁) x₁
- smooth : LipschitzWith (Gradient_Descent_fix_stepsize.l f f' initial_x) f'
- update : ∀ (k : ℕ), Gradient_Descent_fix_stepsize.x f f' initial_x (k + 1) = Gradient_Descent_fix_stepsize.x f f' initial_x k - Gradient_Descent_fix_stepsize.a f f' initial_x • f' (Gradient_Descent_fix_stepsize.x f f' initial_x k)
- hl : ↑(Gradient_Descent_fix_stepsize.l f f' initial_x) > 0
- step₁ : Gradient_Descent_fix_stepsize.a f f' initial_x > 0
- initial : Gradient_Descent_fix_stepsize.x f f' initial_x 0 = initial_x
Instances
theorem
Gradient_Descent_fix_stepsize.diff
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
(initial_x : E)
[self : Gradient_Descent_fix_stepsize f f' initial_x]
(x₁ : E)
:
HasGradientAt f (f' x₁) x₁
theorem
Gradient_Descent_fix_stepsize.smooth
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{initial_x : E}
[self : Gradient_Descent_fix_stepsize f f' initial_x]
:
LipschitzWith (Gradient_Descent_fix_stepsize.l f f' initial_x) f'
theorem
Gradient_Descent_fix_stepsize.update
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{initial_x : E}
[self : Gradient_Descent_fix_stepsize f f' initial_x]
(k : ℕ)
:
Gradient_Descent_fix_stepsize.x f f' initial_x (k + 1) = Gradient_Descent_fix_stepsize.x f f' initial_x k - Gradient_Descent_fix_stepsize.a f f' initial_x • f' (Gradient_Descent_fix_stepsize.x f f' initial_x k)
theorem
Gradient_Descent_fix_stepsize.hl
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{initial_x : E}
[self : Gradient_Descent_fix_stepsize f f' initial_x]
:
↑(Gradient_Descent_fix_stepsize.l f f' initial_x) > 0
theorem
Gradient_Descent_fix_stepsize.step₁
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{initial_x : E}
[self : Gradient_Descent_fix_stepsize f f' initial_x]
:
Gradient_Descent_fix_stepsize.a f f' initial_x > 0
theorem
Gradient_Descent_fix_stepsize.initial
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{initial_x : E}
[self : Gradient_Descent_fix_stepsize f f' initial_x]
:
Gradient_Descent_fix_stepsize.x f f' initial_x 0 = initial_x
instance
instGradientDescentOfGradient_Descent_fix_stepsize
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{x₀ : E}
[p : Gradient_Descent_fix_stepsize f f' x₀]
:
GradientDescent f f' x₀
Equations
- One or more equations did not get rendered due to their size.
theorem
convex_function
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
(h₁ : ∀ (x₁ : E), HasGradientAt f (f' x₁) x₁)
(hfun : ConvexOn ℝ Set.univ f)
(x : E)
(y : E)
:
theorem
convex_lipschitz
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{l : NNReal}
{a : ℝ}
(h₁ : ∀ (x₁ : E), HasGradientAt f (f' x₁) x₁)
(h₂ : ↑l > 0)
(ha₁ : ↑l ≤ 1 / a)
(ha₂ : a > 0)
(h₃ : LipschitzWith l f')
(x : E)
:
theorem
point_descent_for_convex
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{xm : E}
{x₀ : E}
{alg : Gradient_Descent_fix_stepsize f f' x₀}
(hfun : ConvexOn ℝ Set.univ f)
(step₂ : Gradient_Descent_fix_stepsize.a f f' x₀ ≤ 1 / ↑(Gradient_Descent_fix_stepsize.l f f' x₀))
(k : ℕ)
:
f (Gradient_Descent_fix_stepsize.x f f' x₀ (k + 1)) ≤ f xm + 1 / (2 * Gradient_Descent_fix_stepsize.a f f' x₀) * (‖Gradient_Descent_fix_stepsize.x f f' x₀ k - xm‖ ^ 2 - ‖Gradient_Descent_fix_stepsize.x f f' x₀ (k + 1) - xm‖ ^ 2)
theorem
gradient_method
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{xm : E}
{x₀ : E}
{alg : Gradient_Descent_fix_stepsize f f' x₀}
(hfun : ConvexOn ℝ Set.univ f)
(step₂ : Gradient_Descent_fix_stepsize.a f f' x₀ ≤ 1 / ↑(Gradient_Descent_fix_stepsize.l f f' x₀))
(k : ℕ)
: