NesterovSmooth #
Main results #
This file mainly concentrates on the Nesterov algorithm for smooth convex optimization problems.
We prove the O(1 / k ^ 2) rate for this algorithm.
class
Nesterov
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
(f' : E → E)
(γ : ℕ+ → ℝ)
(initial_point : E)
:
Type u_1
- x : ℕ → E
- y : ℕ+ → E
- v : ℕ → E
- l : NNReal
- diff : ∀ (x₁ : E), HasGradientAt f (f' x₁) x₁
- update1 : ∀ (k : ℕ+), Nesterov.y f f' γ initial_point k = (1 - γ k) • Nesterov.x f f' γ initial_point (↑k - 1) + γ k • Nesterov.v f f' γ initial_point (↑k - 1)
- update2 : ∀ (k : ℕ+), Nesterov.x f f' γ initial_point ↑k = Nesterov.y f f' γ initial_point k - (1 / ↑(Nesterov.l f f' γ initial_point)) • f' (Nesterov.y f f' γ initial_point k)
- update3 : ∀ (k : ℕ+), Nesterov.v f f' γ initial_point ↑k = Nesterov.x f f' γ initial_point (↑k - 1) + (1 / γ k) • (Nesterov.x f f' γ initial_point ↑k - Nesterov.x f f' γ initial_point (↑k - 1))
- hl : Nesterov.l f f' γ initial_point > 0
- smooth : LipschitzWith (Nesterov.l f f' γ initial_point) f'
- initial1 : γ 1 = 1
- initial2 : Nesterov.v f f' γ initial_point 0 = initial_point
Instances
theorem
Nesterov.diff
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
(γ : ℕ+ → ℝ)
(initial_point : E)
[self : Nesterov f f' γ initial_point]
(x₁ : E)
:
HasGradientAt f (f' x₁) x₁
theorem
Nesterov.update1
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{γ : ℕ+ → ℝ}
{initial_point : E}
[self : Nesterov f f' γ initial_point]
(k : ℕ+)
:
Nesterov.y f f' γ initial_point k = (1 - γ k) • Nesterov.x f f' γ initial_point (↑k - 1) + γ k • Nesterov.v f f' γ initial_point (↑k - 1)
theorem
Nesterov.update2
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{γ : ℕ+ → ℝ}
{initial_point : E}
[self : Nesterov f f' γ initial_point]
(k : ℕ+)
:
Nesterov.x f f' γ initial_point ↑k = Nesterov.y f f' γ initial_point k - (1 / ↑(Nesterov.l f f' γ initial_point)) • f' (Nesterov.y f f' γ initial_point k)
theorem
Nesterov.update3
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{γ : ℕ+ → ℝ}
{initial_point : E}
[self : Nesterov f f' γ initial_point]
(k : ℕ+)
:
Nesterov.v f f' γ initial_point ↑k = Nesterov.x f f' γ initial_point (↑k - 1) + (1 / γ k) • (Nesterov.x f f' γ initial_point ↑k - Nesterov.x f f' γ initial_point (↑k - 1))
theorem
Nesterov.hl
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{γ : ℕ+ → ℝ}
{initial_point : E}
[self : Nesterov f f' γ initial_point]
:
Nesterov.l f f' γ initial_point > 0
theorem
Nesterov.smooth
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{γ : ℕ+ → ℝ}
{initial_point : E}
[self : Nesterov f f' γ initial_point]
:
LipschitzWith (Nesterov.l f f' γ initial_point) f'
theorem
Nesterov.initial1
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
(f' : E → E)
{γ : ℕ+ → ℝ}
(initial_point : E)
[self : Nesterov f f' γ initial_point]
:
γ 1 = 1
theorem
Nesterov.initial2
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{γ : ℕ+ → ℝ}
{initial_point : E}
[self : Nesterov f f' γ initial_point]
:
Nesterov.v f f' γ initial_point 0 = initial_point
theorem
nesterov_algorithm_smooth
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{f' : E → E}
{xm : E}
{x₀ : E}
{γ : ℕ+ → ℝ}
{alg : Nesterov f f' γ x₀}
(hfun : ConvexOn ℝ Set.univ f)
(hg : ∀ (k : ℕ+), γ k = 2 / (↑↑k + 1))
(min : IsMinOn f Set.univ xm)
(con : ∀ (k : ℕ+), (1 - γ k) / γ k ^ 2 ≤ 1 / γ (k - 1) ^ 2)
(k : ℕ+)
: