import numpy as np

n = 10000
p = 5
theta_true = np.array([-2, -1, 0, 1, 2])


X = np.random.randn(n, p)
Y = X @ theta_true + np.random.randn(n)


def sgd_linear(X, Y, c=1.0, alpha=0.75):
    n, p = X.shape
    theta = np.zeros((n + 1, p))

    for i in range(n):
        grad = -(Y[i] - X[i] @ theta[i]) * X[i]
        theta[i + 1] = theta[i] - c * (i + 1)**(-alpha) * grad

    return theta

theta_path = sgd_linear(X, Y)


import matplotlib.pyplot as plt

theta_true_mat = np.tile(theta_true, (theta_path.shape[0], 1))

err = np.sum((theta_path - theta_true_mat)**2, axis=1)

# Axe des itérations
i = np.arange(1, len(err) + 1)

# Plot
plt.figure()
plt.plot(i, err, linewidth=1)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n")
plt.ylabel("Erreur quadratique")
plt.grid(True, which="both", linestyle="--", alpha=0.1)
plt.show()


B = 50
n, p = X.shape

# Matrice des erreurs : (n+1) x B
err_mat = np.zeros((n + 1, B))

theta_true_mat = np.tile(theta_true, (n + 1, 1))

for b in range(B):
    Xb = np.random.randn(n, p)
    Yb = Xb @ theta_true + np.random.randn(n)

    th = sgd_linear(Xb, Yb)

    err_mat[:, b] = np.sum((th - theta_true_mat)**2, axis=1)

# Moyenne sur les répétitions
err_mean = err_mat.mean(axis=1)

i = np.arange(1, n + 2)

# Plot
plt.figure()
plt.plot(i, err_mean, linewidth=1)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n")
plt.ylabel("Erreur quadratique moyenne")
plt.grid(True, which="both", linestyle="--", alpha=0.1)
plt.show()


B = 50
n, p = X.shape

# Matrices d'erreur : (n+1) x B
err_mat_c03  = np.zeros((n + 1, B))
err_mat_c01  = np.zeros((n + 1, B))
err_mat_c001 = np.zeros((n + 1, B))

theta_true_mat = np.tile(theta_true, (n + 1, 1))

# Matrice de mise à l'échelle diag(1:5)
D = np.diag(np.arange(1, p + 1))

for b in range(B):
    Xb = np.random.randn(n, p) @ D
    Yb = Xb @ theta_true + np.random.randn(n)

    th_c03  = sgd_linear(Xb, Yb, c=0.3)
    th_c01  = sgd_linear(Xb, Yb, c=0.1)
    th_c001 = sgd_linear(Xb, Yb, c=0.01)

    err_mat_c03[:, b]  = np.sum((th_c03  - theta_true_mat)**2, axis=1)
    err_mat_c01[:, b]  = np.sum((th_c01  - theta_true_mat)**2, axis=1)
    err_mat_c001[:, b] = np.sum((th_c001 - theta_true_mat)**2, axis=1)

# Moyennes Monte Carlo
err_mean_c03  = err_mat_c03.mean(axis=1)
err_mean_c01  = err_mat_c01.mean(axis=1)
err_mean_c001 = err_mat_c001.mean(axis=1)

i = np.arange(1, n + 2)

# Plot
plt.figure(figsize=(8, 5))
plt.plot(i, err_mean_c03,  label="c = 0.3", linewidth=1)
plt.plot(i, err_mean_c01,  label="c = 0.1", linewidth=1)
plt.plot(i, err_mean_c001, label="c = 0.01", linewidth=1)

plt.yscale("log")
plt.xlabel("n")
plt.ylabel("Erreur quadratique moyenne")
plt.legend()
plt.grid(True, which="both", linestyle="--", alpha=0.4)
plt.tight_layout()
plt.show()


def sigmoid(t):
    return 1.0 / (1.0 + np.exp(-t))

X = np.random.randn(n, p)
prob = sigmoid(X @ theta_true)
Y = np.random.binomial(1, prob)

def sgd_logistic(X, Y, c=1.0, alpha=0.75):
    n, p = X.shape
    theta = np.zeros((n + 1, p))

    for i in range(n):
        p_i = sigmoid(X[i] @ theta[i])
        grad = (p_i - Y[i]) * X[i]
        theta[i + 1] = theta[i] - c * (i + 1)**(-alpha) * grad

    return theta

theta_path = sgd_logistic(X, Y)

theta_true_mat = np.tile(theta_true, (n + 1, 1))
err = np.sum((theta_path - theta_true_mat)**2, axis=1)

i = np.arange(1, len(err) + 1)

plt.figure()
plt.plot(i, err, linewidth=1)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n")
plt.ylabel("Quadratic error")
plt.grid(True, which="both", linestyle="--", alpha=0.1)
plt.show()

TP 2 - Algorithmes de gradient stochastiques¶

Objectifs du TP¶

1. Régression linéaire¶

2. Régression logistique¶