import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.random.seed(123)
n = 10000
p = 5
theta_true = np.array([-2, -1, 0, 1, 2])
On considère le modèle
$$ Y_i = X_i^T \theta + \varepsilon_i, $$avec :
On prend $n = 10000$.
X = np.random.randn(n, p)
Y = X @ theta_true + np.random.randn(n)
def asgd_linear(X, Y, c=1,alpha=0.75):
n, p = X.shape
theta = np.zeros(p)
thetabar = np.zeros(p)
theta_path = np.zeros((n+1, p))
thetabar_path = np.zeros((n+1, p))
for i in range(n):
grad = (X[i] @ theta - Y[i]) * X[i]
theta = theta - c*(i+1)**(-alpha) * grad
thetabar = thetabar + (theta- thetabar) / (i + 1)
theta_path[i+1] = theta
thetabar_path[i+1] = thetabar
return theta_path, thetabar_path
theta_path, thetabar_path = asgd_linear(X, Y)
err = np.sum((theta_path - theta_true)**2, axis=1)
errbar = np.sum((thetabar_path - theta_true)**2, axis=1)
plt.figure()
plt.plot(err, label="SGD")
plt.plot(errbar, label="ASGD")
plt.xscale("log")
plt.yscale("log")
plt.xlabel("Iteration")
plt.ylabel("Squared error")
plt.legend()
plt.show()
B = 50
n, p = X.shape
# Matrice des erreurs : (n+1) x B
err_mat = np.zeros((n + 1, B))
err_matbar = np.zeros((n + 1, B))
theta_true_mat = np.tile(theta_true, (n + 1, 1))
for b in range(B):
Xb = np.random.randn(n, p)
Yb = Xb @ theta_true + np.random.randn(n)
th,thbar = asgd_linear(Xb, Yb)
err_mat[:, b] = np.sum((th - theta_true_mat)**2, axis=1)
err_matbar[:, b] = np.sum((thbar - theta_true_mat)**2, axis=1)
# Moyenne sur les répétitions
err_mean = err_mat.mean(axis=1)
err_meanbar = err_matbar.mean(axis=1)
i = np.arange(1, n + 2)
# Plot
plt.figure()
plt.plot(i,err_mean, label="SGD",linewidth=1)
plt.plot(i,err_meanbar, label="ASGD",linewidth=1)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n")
plt.ylabel("Erreur quadratique moyenne")
plt.grid(True, which="both", linestyle="--", alpha=0.1)
plt.show()
On prend ici $\gamma_{n}=c_{\gamma}n^{-\alpha}$ avec $c=1$ et $\alpha=0.75$.
theta_true=np.ones(5)
def sigmoid(t):
return 1.0 / (1.0 + np.exp(-t))
X = np.random.randn(n, p)
prob = sigmoid(X @ theta_true)
Y = np.random.binomial(1, prob)
def asgd_logistic(X, Y, c=1.0, alpha=0.75):
n, p = X.shape
theta = np.zeros((n + 1, p))
thetabar = np.zeros((n + 1, p))
for i in range(n):
p_i = sigmoid(X[i] @ theta[i])
grad = (p_i - Y[i]) * X[i]
theta[i + 1] = theta[i] - c * (i + 1)**(-alpha) * grad
thetabar[i+1] = thetabar[i] + (theta[i+1]- thetabar[i]) / (i + 1)
return theta, thetabar
theta_path , thetabar_path = asgd_logistic(X, Y)
theta_true_mat = np.tile(theta_true, (n + 1, 1))
err = np.sum((theta_path - theta_true_mat)**2, axis=1)
errbar = np.sum((thetabar_path - theta_true_mat)**2, axis=1)
i = np.arange(1, len(err) + 1)
plt.figure()
plt.plot(i, err, linewidth=1)
plt.plot(i, errbar, linewidth=1)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n")
plt.ylabel("Quadratic error")
plt.grid(True, which="both", linestyle="--", alpha=0.1)
plt.show()
theta_path , thetabar_path = asgd_logistic(X, Y,c=2,alpha=0.66)
theta_true_mat = np.tile(theta_true, (n + 1, 1))
err = np.sum((theta_path - theta_true_mat)**2, axis=1)
errbar = np.sum((thetabar_path - theta_true_mat)**2, axis=1)
i = np.arange(1, len(err) + 1)
plt.figure()
plt.plot(i, err, linewidth=1)
plt.plot(i, errbar, linewidth=1)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n")
plt.ylabel("Quadratic error")
plt.grid(True, which="both", linestyle="--", alpha=0.1)
plt.show()
Revenir à l'exemple de la régression linéaire mais en prenant $X \sim \mathcal{N}(0, D)$ avec $D = \text{diag}\left( 10^{-2} , 10^{-1} , 1 , 10, 10^{2} \right)$. Regarder les évolutions des erreurs quadratiques moyennes pour les estimateurs de gradient stochastique et leurs versions moyennées pour différentes valeurs de $c_{\gamma}$.
D = np.sqrt(np.diag([1e-2, 1e-1, 1, 10, 1e2]))
X = np.random.randn(n, p) @ D
Y = X @ theta_true + np.random.randn(n)
B = 50
n, p = X.shape
# Matrice des erreurs : (n+1) x B
err_mat = np.zeros((n + 1, B))
err_matbar = np.zeros((n + 1, B))
theta_true_mat = np.tile(theta_true, (n + 1, 1))
for b in range(B):
Xb = np.random.randn(n, p)@D
Yb = Xb @ theta_true + np.random.randn(n)
th,thbar = asgd_linear(Xb, Yb,c=0.01)
err_mat[:, b] = np.sum((th - theta_true_mat)**2, axis=1)
err_matbar[:, b] = np.sum((thbar - theta_true_mat)**2, axis=1)
# Moyenne sur les répétitions
err_mean = err_mat.mean(axis=1)
err_meanbar = err_matbar.mean(axis=1)
i = np.arange(1, n + 2)
# Plot
plt.figure()
plt.plot(i,err_mean, label="SGD",linewidth=1)
plt.plot(i,err_meanbar, label="ASGD",linewidth=1)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n")
plt.ylabel("Erreur quadratique moyenne")
plt.grid(True, which="both", linestyle="--", alpha=0.1)
plt.show()