NesterovAccelerationSecond #
Main results #
This file mainly concentrates on the second version of Nesterov algorithm for composite optimization problems.
We prove the O(1 / k ^ 2) rate for this algorithm.
class
Nesterov_second
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
(h : E → ℝ)
(f' : E → E)
(x0 : E)
:
Type u_1
- l : NNReal
- hl : ↑(Nesterov_second.l f h f' x0) > 0
- x : ℕ → E
- y : ℕ → E
- z : ℕ+ → E
- h₁ : ∀ (x : E), HasGradientAt f (f' x) x
- h₂ : LipschitzWith (Nesterov_second.l f h f' x0) f'
- oriy : Nesterov_second.y f h f' x0 0 = Nesterov_second.x f h f' x0 0
- oriγ : Nesterov_second.γ f h f' x0 1 = 1
- initial : x0 = Nesterov_second.x f h f' x0 0
- cond : ∀ (n : ℕ+), (1 - Nesterov_second.γ f h f' x0 (↑n + 1)) * Nesterov_second.t f h f' x0 (↑n + 1) / Nesterov_second.γ f h f' x0 (↑n + 1) ^ 2 ≤ Nesterov_second.t f h f' x0 ↑n / Nesterov_second.γ f h f' x0 ↑n ^ 2
- tbound : ∀ (k : ℕ), 0 < Nesterov_second.t f h f' x0 k ∧ Nesterov_second.t f h f' x0 k ≤ 1 / ↑(Nesterov_second.l f h f' x0)
- γbound : ∀ (n : ℕ), 0 < Nesterov_second.γ f h f' x0 n ∧ Nesterov_second.γ f h f' x0 n ≤ 1
- update1 : ∀ (k : ℕ+), Nesterov_second.z f h f' x0 k = (1 - Nesterov_second.γ f h f' x0 ↑k) • Nesterov_second.x f h f' x0 (↑k - 1) + Nesterov_second.γ f h f' x0 ↑k • Nesterov_second.y f h f' x0 (↑k - 1)
- update2 : ∀ (k : ℕ+), prox_prop ((Nesterov_second.t f h f' x0 ↑k / Nesterov_second.γ f h f' x0 ↑k) • h) (Nesterov_second.y f h f' x0 (↑k - 1) - (Nesterov_second.t f h f' x0 ↑k / Nesterov_second.γ f h f' x0 ↑k) • f' (Nesterov_second.z f h f' x0 k)) (Nesterov_second.y f h f' x0 ↑k)
- update3 : ∀ (k : ℕ+), Nesterov_second.x f h f' x0 ↑k = (1 - Nesterov_second.γ f h f' x0 ↑k) • Nesterov_second.x f h f' x0 (↑k - 1) + Nesterov_second.γ f h f' x0 ↑k • Nesterov_second.y f h f' x0 ↑k
Instances
theorem
Nesterov_second.hl
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second f h f' x0]
:
↑(Nesterov_second.l f h f' x0) > 0
theorem
Nesterov_second.h₁
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
(h : E → ℝ)
{f' : E → E}
(x0 : E)
[self : Nesterov_second f h f' x0]
(x : E)
:
HasGradientAt f (f' x) x
theorem
Nesterov_second.convf
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
(h : E → ℝ)
(f' : E → E)
(x0 : E)
[self : Nesterov_second f h f' x0]
:
theorem
Nesterov_second.h₂
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second f h f' x0]
:
LipschitzWith (Nesterov_second.l f h f' x0) f'
theorem
Nesterov_second.convh
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
{h : E → ℝ}
(f' : E → E)
(x0 : E)
[self : Nesterov_second f h f' x0]
:
theorem
Nesterov_second.oriy
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second f h f' x0]
:
Nesterov_second.y f h f' x0 0 = Nesterov_second.x f h f' x0 0
theorem
Nesterov_second.oriγ
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second f h f' x0]
:
Nesterov_second.γ f h f' x0 1 = 1
theorem
Nesterov_second.initial
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second f h f' x0]
:
x0 = Nesterov_second.x f h f' x0 0
theorem
Nesterov_second.cond
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second f h f' x0]
(n : ℕ+)
:
(1 - Nesterov_second.γ f h f' x0 (↑n + 1)) * Nesterov_second.t f h f' x0 (↑n + 1) / Nesterov_second.γ f h f' x0 (↑n + 1) ^ 2 ≤ Nesterov_second.t f h f' x0 ↑n / Nesterov_second.γ f h f' x0 ↑n ^ 2
theorem
Nesterov_second.tbound
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second f h f' x0]
(k : ℕ)
:
0 < Nesterov_second.t f h f' x0 k ∧ Nesterov_second.t f h f' x0 k ≤ 1 / ↑(Nesterov_second.l f h f' x0)
theorem
Nesterov_second.γbound
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second f h f' x0]
(n : ℕ)
:
0 < Nesterov_second.γ f h f' x0 n ∧ Nesterov_second.γ f h f' x0 n ≤ 1
theorem
Nesterov_second.update1
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second f h f' x0]
(k : ℕ+)
:
Nesterov_second.z f h f' x0 k = (1 - Nesterov_second.γ f h f' x0 ↑k) • Nesterov_second.x f h f' x0 (↑k - 1) + Nesterov_second.γ f h f' x0 ↑k • Nesterov_second.y f h f' x0 (↑k - 1)
theorem
Nesterov_second.update2
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second f h f' x0]
(k : ℕ+)
:
prox_prop ((Nesterov_second.t f h f' x0 ↑k / Nesterov_second.γ f h f' x0 ↑k) • h)
(Nesterov_second.y f h f' x0 (↑k - 1) - (Nesterov_second.t f h f' x0 ↑k / Nesterov_second.γ f h f' x0 ↑k) • f' (Nesterov_second.z f h f' x0 k))
(Nesterov_second.y f h f' x0 ↑k)
theorem
Nesterov_second.update3
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second f h f' x0]
(k : ℕ+)
:
Nesterov_second.x f h f' x0 ↑k = (1 - Nesterov_second.γ f h f' x0 ↑k) • Nesterov_second.x f h f' x0 (↑k - 1) + Nesterov_second.γ f h f' x0 ↑k • Nesterov_second.y f h f' x0 ↑k
theorem
Nesterov_second_convergence
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{x0 : E}
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{alg : Nesterov_second f h f' x0}
{xm : E}
(minφ : IsMinOn (f + h) Set.univ xm)
(k : ℕ)
:
f (Nesterov_second.x f h f' x0 (k + 1)) + h (Nesterov_second.x f h f' x0 (k + 1)) - f xm - h xm ≤ Nesterov_second.γ f h f' x0 (k + 1) ^ 2 / (2 * Nesterov_second.t f h f' x0 (k + 1)) * ‖x0 - xm‖ ^ 2
class
Nesterov_second_fix_stepsize
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
(h : E → ℝ)
(f' : E → E)
(x0 : E)
:
Type u_1
- l : NNReal
- hl : ↑(Nesterov_second_fix_stepsize.l f h f' x0) > 0
- x : ℕ → E
- y : ℕ → E
- z : ℕ+ → E
- h₁ : ∀ (x : E), HasGradientAt f (f' x) x
- h₂ : LipschitzWith (Nesterov_second_fix_stepsize.l f h f' x0) f'
- oriy : Nesterov_second_fix_stepsize.y f h f' x0 0 = Nesterov_second_fix_stepsize.x f h f' x0 0
- oriγ : Nesterov_second_fix_stepsize.γ f h f' x0 1 = 1
- initial : x0 = Nesterov_second_fix_stepsize.x f h f' x0 0
- teq : ∀ (n : ℕ), Nesterov_second_fix_stepsize.t f h f' x0 n = 1 / ↑(Nesterov_second_fix_stepsize.l f h f' x0)
- update1 : ∀ (k : ℕ+), Nesterov_second_fix_stepsize.z f h f' x0 k = (1 - Nesterov_second_fix_stepsize.γ f h f' x0 ↑k) • Nesterov_second_fix_stepsize.x f h f' x0 (↑k - 1) + Nesterov_second_fix_stepsize.γ f h f' x0 ↑k • Nesterov_second_fix_stepsize.y f h f' x0 (↑k - 1)
- update2 : ∀ (k : ℕ+), prox_prop ((Nesterov_second_fix_stepsize.t f h f' x0 ↑k / Nesterov_second_fix_stepsize.γ f h f' x0 ↑k) • h) (Nesterov_second_fix_stepsize.y f h f' x0 (↑k - 1) - (Nesterov_second_fix_stepsize.t f h f' x0 ↑k / Nesterov_second_fix_stepsize.γ f h f' x0 ↑k) • f' (Nesterov_second_fix_stepsize.z f h f' x0 k)) (Nesterov_second_fix_stepsize.y f h f' x0 ↑k)
- update3 : ∀ (k : ℕ+), Nesterov_second_fix_stepsize.x f h f' x0 ↑k = (1 - Nesterov_second_fix_stepsize.γ f h f' x0 ↑k) • Nesterov_second_fix_stepsize.x f h f' x0 (↑k - 1) + Nesterov_second_fix_stepsize.γ f h f' x0 ↑k • Nesterov_second_fix_stepsize.y f h f' x0 ↑k
Instances
theorem
Nesterov_second_fix_stepsize.hl
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second_fix_stepsize f h f' x0]
:
↑(Nesterov_second_fix_stepsize.l f h f' x0) > 0
theorem
Nesterov_second_fix_stepsize.h₁
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
(h : E → ℝ)
{f' : E → E}
(x0 : E)
[self : Nesterov_second_fix_stepsize f h f' x0]
(x : E)
:
HasGradientAt f (f' x) x
theorem
Nesterov_second_fix_stepsize.convf
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
(h : E → ℝ)
(f' : E → E)
(x0 : E)
[self : Nesterov_second_fix_stepsize f h f' x0]
:
theorem
Nesterov_second_fix_stepsize.h₂
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second_fix_stepsize f h f' x0]
:
LipschitzWith (Nesterov_second_fix_stepsize.l f h f' x0) f'
theorem
Nesterov_second_fix_stepsize.convh
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
{h : E → ℝ}
(f' : E → E)
(x0 : E)
[self : Nesterov_second_fix_stepsize f h f' x0]
:
theorem
Nesterov_second_fix_stepsize.oriy
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second_fix_stepsize f h f' x0]
:
Nesterov_second_fix_stepsize.y f h f' x0 0 = Nesterov_second_fix_stepsize.x f h f' x0 0
theorem
Nesterov_second_fix_stepsize.oriγ
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second_fix_stepsize f h f' x0]
:
Nesterov_second_fix_stepsize.γ f h f' x0 1 = 1
theorem
Nesterov_second_fix_stepsize.initial
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second_fix_stepsize f h f' x0]
:
x0 = Nesterov_second_fix_stepsize.x f h f' x0 0
theorem
Nesterov_second_fix_stepsize.teq
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second_fix_stepsize f h f' x0]
(n : ℕ)
:
Nesterov_second_fix_stepsize.t f h f' x0 n = 1 / ↑(Nesterov_second_fix_stepsize.l f h f' x0)
theorem
Nesterov_second_fix_stepsize.γeq
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second_fix_stepsize f h f' x0]
(n : ℕ)
:
Nesterov_second_fix_stepsize.γ f h f' x0 n = if n = 0 then 1 / 2 else 2 / (1 + ↑n)
theorem
Nesterov_second_fix_stepsize.update1
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second_fix_stepsize f h f' x0]
(k : ℕ+)
:
Nesterov_second_fix_stepsize.z f h f' x0 k = (1 - Nesterov_second_fix_stepsize.γ f h f' x0 ↑k) • Nesterov_second_fix_stepsize.x f h f' x0 (↑k - 1) + Nesterov_second_fix_stepsize.γ f h f' x0 ↑k • Nesterov_second_fix_stepsize.y f h f' x0 (↑k - 1)
theorem
Nesterov_second_fix_stepsize.update2
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second_fix_stepsize f h f' x0]
(k : ℕ+)
:
prox_prop ((Nesterov_second_fix_stepsize.t f h f' x0 ↑k / Nesterov_second_fix_stepsize.γ f h f' x0 ↑k) • h)
(Nesterov_second_fix_stepsize.y f h f' x0 (↑k - 1) - (Nesterov_second_fix_stepsize.t f h f' x0 ↑k / Nesterov_second_fix_stepsize.γ f h f' x0 ↑k) • f' (Nesterov_second_fix_stepsize.z f h f' x0 k))
(Nesterov_second_fix_stepsize.y f h f' x0 ↑k)
theorem
Nesterov_second_fix_stepsize.update3
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[self : Nesterov_second_fix_stepsize f h f' x0]
(k : ℕ+)
:
Nesterov_second_fix_stepsize.x f h f' x0 ↑k = (1 - Nesterov_second_fix_stepsize.γ f h f' x0 ↑k) • Nesterov_second_fix_stepsize.x f h f' x0 (↑k - 1) + Nesterov_second_fix_stepsize.γ f h f' x0 ↑k • Nesterov_second_fix_stepsize.y f h f' x0 ↑k
instance
instNesterov_secondOfNesterov_second_fix_stepsize
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
[p : Nesterov_second_fix_stepsize f h f' x0]
:
Nesterov_second f h f' x0
Equations
- One or more equations did not get rendered due to their size.
theorem
Nesterov_second_fix_stepsize_converge
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x0 : E}
{alg : Nesterov_second_fix_stepsize f h f' x0}
{xm : E}
(minφ : IsMinOn (f + h) Set.univ xm)
(k : ℕ)
: