NesterovAccelerationFirst #
Main results #
This file mainly concentrates on the first version of Nesterov algorithm for composite optimization problems.
We prove the O(1 / k ^ 2) rate for this algorithm.
class
Nesterov_first
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
(h : E → ℝ)
(f' : E → E)
(x0 : E)
:
Type u_1
- l : NNReal
- x : ℕ → E
- y : ℕ → E
- hl : ↑(Nesterov_first.l f h f' x0) > 0
- h₁ : ∀ (x : E), HasGradientAt f (f' x) x
- h₂ : LipschitzWith (Nesterov_first.l f h f' x0) f'
- oriy : Nesterov_first.y f h f' x0 0 = Nesterov_first.x f h f' x0 0
- oriγ : Nesterov_first.γ f h f' x0 0 = 1
- initial : Nesterov_first.x f h f' x0 0 = x0
- cond : ∀ (n : ℕ+), (1 - Nesterov_first.γ f h f' x0 ↑n) * Nesterov_first.t f h f' x0 ↑n / Nesterov_first.γ f h f' x0 ↑n ^ 2 ≤ Nesterov_first.t f h f' x0 (↑n - 1) / Nesterov_first.γ f h f' x0 (↑n - 1) ^ 2
- tbound : ∀ (k : ℕ), 0 < Nesterov_first.t f h f' x0 k ∧ Nesterov_first.t f h f' x0 k ≤ 1 / ↑(Nesterov_first.l f h f' x0)
- γbound : ∀ (n : ℕ), 0 < Nesterov_first.γ f h f' x0 n ∧ Nesterov_first.γ f h f' x0 n ≤ 1
- update1 : ∀ (k : ℕ+), Nesterov_first.y f h f' x0 ↑k = Nesterov_first.x f h f' x0 ↑k + (Nesterov_first.γ f h f' x0 ↑k * (1 - Nesterov_first.γ f h f' x0 (↑k - 1)) / Nesterov_first.γ f h f' x0 (↑k - 1)) • (Nesterov_first.x f h f' x0 ↑k - Nesterov_first.x f h f' x0 (↑k - 1))
- update2 : ∀ (k : ℕ), prox_prop (Nesterov_first.t f h f' x0 k • h) (Nesterov_first.y f h f' x0 k - Nesterov_first.t f h f' x0 k • f' (Nesterov_first.y f h f' x0 k)) (Nesterov_first.x f h f' x0 (k + 1))
Instances
theorem
Nesterov_first.hl
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first f h f' x0]
:
↑(Nesterov_first.l f h f' x0) > 0
theorem
Nesterov_first.h₁
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
(h : E → ℝ)
{f' : E → E}
(x0 : E)
[self : Nesterov_first f h f' x0]
(x : E)
:
HasGradientAt f (f' x) x
theorem
Nesterov_first.convf
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
(h : E → ℝ)
(f' : E → E)
(x0 : E)
[self : Nesterov_first f h f' x0]
:
theorem
Nesterov_first.h₂
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first f h f' x0]
:
LipschitzWith (Nesterov_first.l f h f' x0) f'
theorem
Nesterov_first.convh
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
{h : E → ℝ}
(f' : E → E)
(x0 : E)
[self : Nesterov_first f h f' x0]
:
theorem
Nesterov_first.oriy
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first f h f' x0]
:
Nesterov_first.y f h f' x0 0 = Nesterov_first.x f h f' x0 0
theorem
Nesterov_first.oriγ
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first f h f' x0]
:
Nesterov_first.γ f h f' x0 0 = 1
theorem
Nesterov_first.initial
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first f h f' x0]
:
Nesterov_first.x f h f' x0 0 = x0
theorem
Nesterov_first.cond
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first f h f' x0]
(n : ℕ+)
:
(1 - Nesterov_first.γ f h f' x0 ↑n) * Nesterov_first.t f h f' x0 ↑n / Nesterov_first.γ f h f' x0 ↑n ^ 2 ≤ Nesterov_first.t f h f' x0 (↑n - 1) / Nesterov_first.γ f h f' x0 (↑n - 1) ^ 2
theorem
Nesterov_first.tbound
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first f h f' x0]
(k : ℕ)
:
0 < Nesterov_first.t f h f' x0 k ∧ Nesterov_first.t f h f' x0 k ≤ 1 / ↑(Nesterov_first.l f h f' x0)
theorem
Nesterov_first.γbound
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first f h f' x0]
(n : ℕ)
:
0 < Nesterov_first.γ f h f' x0 n ∧ Nesterov_first.γ f h f' x0 n ≤ 1
theorem
Nesterov_first.update1
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first f h f' x0]
(k : ℕ+)
:
Nesterov_first.y f h f' x0 ↑k = Nesterov_first.x f h f' x0 ↑k + (Nesterov_first.γ f h f' x0 ↑k * (1 - Nesterov_first.γ f h f' x0 (↑k - 1)) / Nesterov_first.γ f h f' x0 (↑k - 1)) • (Nesterov_first.x f h f' x0 ↑k - Nesterov_first.x f h f' x0 (↑k - 1))
theorem
Nesterov_first.update2
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first f h f' x0]
(k : ℕ)
:
prox_prop (Nesterov_first.t f h f' x0 k • h)
(Nesterov_first.y f h f' x0 k - Nesterov_first.t f h f' x0 k • f' (Nesterov_first.y f h f' x0 k))
(Nesterov_first.x f h f' x0 (k + 1))
theorem
Nesterov_first_converge
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
{alg : Nesterov_first f h f' x0}
{xm : E}
(minφ : IsMinOn (f + h) Set.univ xm)
(k : ℕ)
:
f (Nesterov_first.x f h f' x0 (k + 1)) + h (Nesterov_first.x f h f' x0 (k + 1)) - f xm - h xm ≤ Nesterov_first.γ f h f' x0 k ^ 2 / (2 * Nesterov_first.t f h f' x0 k) * ‖x0 - xm‖ ^ 2
class
Nesterov_first_fix_stepsize
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
(h : E → ℝ)
(f' : E → E)
(x0 : E)
:
Type u_1
- l : NNReal
- hl : ↑(Nesterov_first_fix_stepsize.l f h f' x0) > 0
- h₁ : ∀ (x : E), HasGradientAt f (f' x) x
- h₂ : LipschitzWith (Nesterov_first_fix_stepsize.l f h f' x0) f'
- x : ℕ → E
- y : ℕ → E
- oriy : Nesterov_first_fix_stepsize.y f h f' x0 0 = Nesterov_first_fix_stepsize.x f h f' x0 0
- initial : Nesterov_first_fix_stepsize.x f h f' x0 0 = x0
- teq : ∀ (n : ℕ), Nesterov_first_fix_stepsize.t f h f' x0 n = 1 / ↑(Nesterov_first_fix_stepsize.l f h f' x0)
- γeq : ∀ (n : ℕ), Nesterov_first_fix_stepsize.γ f h f' x0 n = 2 / (2 + ↑n)
- update1 : ∀ (k : ℕ+), Nesterov_first_fix_stepsize.y f h f' x0 ↑k = Nesterov_first_fix_stepsize.x f h f' x0 ↑k + (Nesterov_first_fix_stepsize.γ f h f' x0 ↑k * (1 - Nesterov_first_fix_stepsize.γ f h f' x0 (↑k - 1)) / Nesterov_first_fix_stepsize.γ f h f' x0 (↑k - 1)) • (Nesterov_first_fix_stepsize.x f h f' x0 ↑k - Nesterov_first_fix_stepsize.x f h f' x0 (↑k - 1))
- update2 : ∀ (k : ℕ), prox_prop (Nesterov_first_fix_stepsize.t f h f' x0 k • h) (Nesterov_first_fix_stepsize.y f h f' x0 k - Nesterov_first_fix_stepsize.t f h f' x0 k • f' (Nesterov_first_fix_stepsize.y f h f' x0 k)) (Nesterov_first_fix_stepsize.x f h f' x0 (k + 1))
Instances
theorem
Nesterov_first_fix_stepsize.hl
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first_fix_stepsize f h f' x0]
:
↑(Nesterov_first_fix_stepsize.l f h f' x0) > 0
theorem
Nesterov_first_fix_stepsize.h₁
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
(h : E → ℝ)
{f' : E → E}
(x0 : E)
[self : Nesterov_first_fix_stepsize f h f' x0]
(x : E)
:
HasGradientAt f (f' x) x
theorem
Nesterov_first_fix_stepsize.convf
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
(h : E → ℝ)
(f' : E → E)
(x0 : E)
[self : Nesterov_first_fix_stepsize f h f' x0]
:
theorem
Nesterov_first_fix_stepsize.h₂
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first_fix_stepsize f h f' x0]
:
LipschitzWith (Nesterov_first_fix_stepsize.l f h f' x0) f'
theorem
Nesterov_first_fix_stepsize.convh
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
{h : E → ℝ}
(f' : E → E)
(x0 : E)
[self : Nesterov_first_fix_stepsize f h f' x0]
:
theorem
Nesterov_first_fix_stepsize.oriy
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first_fix_stepsize f h f' x0]
:
Nesterov_first_fix_stepsize.y f h f' x0 0 = Nesterov_first_fix_stepsize.x f h f' x0 0
theorem
Nesterov_first_fix_stepsize.initial
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first_fix_stepsize f h f' x0]
:
Nesterov_first_fix_stepsize.x f h f' x0 0 = x0
theorem
Nesterov_first_fix_stepsize.teq
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first_fix_stepsize f h f' x0]
(n : ℕ)
:
Nesterov_first_fix_stepsize.t f h f' x0 n = 1 / ↑(Nesterov_first_fix_stepsize.l f h f' x0)
theorem
Nesterov_first_fix_stepsize.γeq
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first_fix_stepsize f h f' x0]
(n : ℕ)
:
Nesterov_first_fix_stepsize.γ f h f' x0 n = 2 / (2 + ↑n)
theorem
Nesterov_first_fix_stepsize.update1
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first_fix_stepsize f h f' x0]
(k : ℕ+)
:
Nesterov_first_fix_stepsize.y f h f' x0 ↑k = Nesterov_first_fix_stepsize.x f h f' x0 ↑k + (Nesterov_first_fix_stepsize.γ f h f' x0 ↑k * (1 - Nesterov_first_fix_stepsize.γ f h f' x0 (↑k - 1)) / Nesterov_first_fix_stepsize.γ f h f' x0 (↑k - 1)) • (Nesterov_first_fix_stepsize.x f h f' x0 ↑k - Nesterov_first_fix_stepsize.x f h f' x0 (↑k - 1))
theorem
Nesterov_first_fix_stepsize.update2
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_first_fix_stepsize f h f' x0]
(k : ℕ)
:
prox_prop (Nesterov_first_fix_stepsize.t f h f' x0 k • h)
(Nesterov_first_fix_stepsize.y f h f' x0 k - Nesterov_first_fix_stepsize.t f h f' x0 k • f' (Nesterov_first_fix_stepsize.y f h f' x0 k))
(Nesterov_first_fix_stepsize.x f h f' x0 (k + 1))
instance
instNesterov_firstOfNesterov_first_fix_stepsize
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[p : Nesterov_first_fix_stepsize f h f' x0]
:
Nesterov_first f h f' x0
Equations
- One or more equations did not get rendered due to their size.
theorem
Nesterov_first_fix_stepsize_converge
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
{alg : Nesterov_first_fix_stepsize f h f' x0}
{xm : E}
(minφ : IsMinOn (f + h) Set.univ xm)
(k : ℕ)
: