优化算法学习
参考 动手学深度学习-11.优化算法 AI辅助创作,部分内容因个人时间精力进行了取舍 梯度下降 利用泰勒展开,\(f(x+\epsilon) = f(x) + \epsilon f'(x)+O(\epsilon ^ 2)\),由于我们的目的是求得另一个\(x_1\),使得\(f(x_1) \lt f(x)\),其中\(x_1=x+\epsilon\) 换言之就是让$\epsilon f'(x)<0$ 固定步长为\(\eta>0\),取\(\epsilon=-\eta f'(x)\),代入可得\(\epsilon f'(x) = -\eta f'(x)^2 <0\) 1 2 3 4 %matplotlib inline import numpy as np import torch from d2l import torch as d2l 定义目标函数为\(f(x)=x^2\),目标函数导数为\(2x\) 1 2 3 4 def f(x): return x ** 2 def f_grad(x): return 2 * x 使用\(x=10\)作为初始值,设\(\eta=0.2\),使用梯度下降法迭代\(10\)次 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 import matplotlib.pyplot as plt def gd(eta,f_grad): x = 10 results = [x] for i in range(10): x -= eta * f_grad(x) results.append(float(x)) print(f'epoch 10,x:{x:f}') return results _trace_history = [] def show_trace(results, f, title=None, max_cols=3, reset=False, show=True): global _trace_history if reset: _trace_history = [] _trace_history.append((results, f, title)) if not show: return n = len(_trace_history) cols = min(max_cols, n) traces = _trace_history[-cols:] fig, axes = plt.subplots(1, cols, figsize=(4.8 * cols, 3.6)) if cols == 1: axes = [axes] for i, (ax, (res, func, t)) in enumerate(zip(axes, traces), start=n - cols + 1): bound = max(abs(min(res)), abs(max(res))) f_line = torch.arange(-bound, bound, 0.01) x_line = f_line.numpy() y_line = [float(func(x)) for x in f_line] x_res = [float(x) for x in res] y_res = [float(func(x)) for x in res] ax.plot(x_line, y_line, '-') ax.plot(x_res, y_res, '-o') ax.set_xlabel('x') ax.set_ylabel('f(x)') ax.set_title(t if t is not None else f'第{i}次轨迹') ax.grid(alpha=0.3) plt.tight_layout() plt.show() results = gd(0.2,f_grad) show_trace(results, f, title='eta=0.2', reset=True) epoch 10,x:7.295479 ...