ProximalGradient #
Main results #
This file mainly concentrates on the proximal gradient algorithm for composite optimization problems.
We prove the O(1 / k) rate for this algorithm.
class
proximal_gradient_method
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
(h : E → ℝ)
(f' : E → E)
(x₀ : E)
:
Type u_1
- xm : E
- t : ℝ
- x : ℕ → E
- L : NNReal
- h₁ : ∀ (x₁ : E), HasGradientAt f (f' x₁) x₁
- h₂ : LipschitzWith (proximal_gradient_method.L f h f' x₀) f'
- h₃ : ContinuousOn h Set.univ
- minphi : IsMinOn (f + h) Set.univ (proximal_gradient_method.xm f h f' x₀)
- tpos : 0 < proximal_gradient_method.t f h f' x₀
- step : proximal_gradient_method.t f h f' x₀ ≤ 1 / ↑(proximal_gradient_method.L f h f' x₀)
- ori : proximal_gradient_method.x f h f' x₀ 0 = x₀
- hL : ↑(proximal_gradient_method.L f h f' x₀) > 0
- update : ∀ (k : ℕ), prox_prop (proximal_gradient_method.t f h f' x₀ • h) (proximal_gradient_method.x f h f' x₀ k - proximal_gradient_method.t f h f' x₀ • f' (proximal_gradient_method.x f h f' x₀ k)) (proximal_gradient_method.x f h f' x₀ (k + 1))
Instances
theorem
proximal_gradient_method.fconv
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
(h : E → ℝ)
(f' : E → E)
(x₀ : E)
[self : proximal_gradient_method f h f' x₀]
:
theorem
proximal_gradient_method.hconv
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
{h : E → ℝ}
(f' : E → E)
(x₀ : E)
[self : proximal_gradient_method f h f' x₀]
:
theorem
proximal_gradient_method.h₁
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
(h : E → ℝ)
{f' : E → E}
(x₀ : E)
[self : proximal_gradient_method f h f' x₀]
(x₁ : E)
:
HasGradientAt f (f' x₁) x₁
theorem
proximal_gradient_method.h₂
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x₀ : E}
[self : proximal_gradient_method f h f' x₀]
:
LipschitzWith (proximal_gradient_method.L f h f' x₀) f'
theorem
proximal_gradient_method.h₃
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
(f : E → ℝ)
{h : E → ℝ}
(f' : E → E)
(x₀ : E)
[self : proximal_gradient_method f h f' x₀]
:
ContinuousOn h Set.univ
theorem
proximal_gradient_method.minphi
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x₀ : E}
[self : proximal_gradient_method f h f' x₀]
:
IsMinOn (f + h) Set.univ (proximal_gradient_method.xm f h f' x₀)
theorem
proximal_gradient_method.tpos
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x₀ : E}
[self : proximal_gradient_method f h f' x₀]
:
0 < proximal_gradient_method.t f h f' x₀
theorem
proximal_gradient_method.step
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x₀ : E}
[self : proximal_gradient_method f h f' x₀]
:
proximal_gradient_method.t f h f' x₀ ≤ 1 / ↑(proximal_gradient_method.L f h f' x₀)
theorem
proximal_gradient_method.ori
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x₀ : E}
[self : proximal_gradient_method f h f' x₀]
:
proximal_gradient_method.x f h f' x₀ 0 = x₀
theorem
proximal_gradient_method.hL
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x₀ : E}
[self : proximal_gradient_method f h f' x₀]
:
↑(proximal_gradient_method.L f h f' x₀) > 0
theorem
proximal_gradient_method.update
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
{f : E → ℝ}
{h : E → ℝ}
{f' : E → E}
{x₀ : E}
[self : proximal_gradient_method f h f' x₀]
(k : ℕ)
:
prox_prop (proximal_gradient_method.t f h f' x₀ • h)
(proximal_gradient_method.x f h f' x₀ k - proximal_gradient_method.t f h f' x₀ • f' (proximal_gradient_method.x f h f' x₀ k))
(proximal_gradient_method.x f h f' x₀ (k + 1))
theorem
proximal_gradient_method_converge
{E : Type u_1}
[NormedAddCommGroup E]
[InnerProductSpace ℝ E]
[CompleteSpace E]
[ProperSpace E]
{x₀ : E}
{f : E → ℝ}
{f' : E → E}
{h : E → ℝ}
{alg : proximal_gradient_method f h f' x₀}
(k : ℕ+)
:
f (proximal_gradient_method.x f h f' x₀ ↑k) + h (proximal_gradient_method.x f h f' x₀ ↑k) - f (proximal_gradient_method.xm f h f' x₀) - h (proximal_gradient_method.xm f h f' x₀) ≤ 1 / (2 * ↑↑k * proximal_gradient_method.t f h f' x₀) * ‖x₀ - proximal_gradient_method.xm f h f' x₀‖ ^ 2