import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

np.random.seed(123)
n = 10000
p = 5
theta_true = np.array([-2, -1, 0, 1, 2])


X = np.random.randn(n, p)
Y = X @ theta_true + np.random.randn(n)


def asgd_linear(X, Y, c=1,alpha=0.75):
    n, p = X.shape
    theta = np.zeros(p)
    thetabar = np.zeros(p)

    theta_path = np.zeros((n+1, p))
    thetabar_path = np.zeros((n+1, p))

    for i in range(n):
        grad = (X[i] @ theta - Y[i]) * X[i]
        theta = theta - c*(i+1)**(-alpha) * grad
        thetabar = thetabar + (theta- thetabar) / (i + 1)

        theta_path[i+1] = theta
        thetabar_path[i+1] = thetabar

    return theta_path, thetabar_path


theta_path, thetabar_path = asgd_linear(X, Y)

err = np.sum((theta_path - theta_true)**2, axis=1)
errbar = np.sum((thetabar_path - theta_true)**2, axis=1)

plt.figure()
plt.plot(err, label="SGD")
plt.plot(errbar, label="ASGD")
plt.xscale("log")
plt.yscale("log")
plt.xlabel("Iteration")
plt.ylabel("Squared error")
plt.legend()
plt.show()


B = 50
n, p = X.shape

# Matrice des erreurs : (n+1) x B
err_mat = np.zeros((n + 1, B))
err_matbar = np.zeros((n + 1, B))

theta_true_mat = np.tile(theta_true, (n + 1, 1))

for b in range(B):
    Xb = np.random.randn(n, p)
    Yb = Xb @ theta_true + np.random.randn(n)

    th,thbar = asgd_linear(Xb, Yb)

    err_mat[:, b] = np.sum((th - theta_true_mat)**2, axis=1)
    err_matbar[:, b] = np.sum((thbar - theta_true_mat)**2, axis=1)

# Moyenne sur les répétitions
err_mean = err_mat.mean(axis=1)
err_meanbar = err_matbar.mean(axis=1)

i = np.arange(1, n + 2)

# Plot
plt.figure()
plt.plot(i,err_mean, label="SGD",linewidth=1)
plt.plot(i,err_meanbar, label="ASGD",linewidth=1)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n")
plt.ylabel("Erreur quadratique moyenne")
plt.grid(True, which="both", linestyle="--", alpha=0.1)
plt.show()


theta_true=np.ones(5)
def sigmoid(t):
    return 1.0 / (1.0 + np.exp(-t))
X = np.random.randn(n, p)
prob = sigmoid(X @ theta_true)
Y = np.random.binomial(1, prob)
def asgd_logistic(X, Y, c=1.0, alpha=0.75):
    n, p = X.shape
    theta = np.zeros((n + 1, p))
    thetabar = np.zeros((n + 1, p))

    for i in range(n):
        p_i = sigmoid(X[i] @ theta[i])
        grad = (p_i - Y[i]) * X[i]
        theta[i + 1] = theta[i] - c * (i + 1)**(-alpha) * grad
        thetabar[i+1] = thetabar[i] + (theta[i+1]- thetabar[i]) / (i + 1)

    return theta, thetabar

theta_path , thetabar_path = asgd_logistic(X, Y)

theta_true_mat = np.tile(theta_true, (n + 1, 1))
err = np.sum((theta_path - theta_true_mat)**2, axis=1)
errbar = np.sum((thetabar_path - theta_true_mat)**2, axis=1)

i = np.arange(1, len(err) + 1)

plt.figure()
plt.plot(i, err, linewidth=1)
plt.plot(i, errbar, linewidth=1)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n")
plt.ylabel("Quadratic error")
plt.grid(True, which="both", linestyle="--", alpha=0.1)
plt.show()


theta_path , thetabar_path = asgd_logistic(X, Y,c=2,alpha=0.66)

theta_true_mat = np.tile(theta_true, (n + 1, 1))
err = np.sum((theta_path - theta_true_mat)**2, axis=1)
errbar = np.sum((thetabar_path - theta_true_mat)**2, axis=1)

i = np.arange(1, len(err) + 1)

plt.figure()
plt.plot(i, err, linewidth=1)
plt.plot(i, errbar, linewidth=1)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n")
plt.ylabel("Quadratic error")
plt.grid(True, which="both", linestyle="--", alpha=0.1)
plt.show()


D = np.sqrt(np.diag([1e-2, 1e-1, 1, 10, 1e2]))
X = np.random.randn(n, p) @ D
Y = X @ theta_true + np.random.randn(n)

B = 50
n, p = X.shape

# Matrice des erreurs : (n+1) x B
err_mat = np.zeros((n + 1, B))
err_matbar = np.zeros((n + 1, B))

theta_true_mat = np.tile(theta_true, (n + 1, 1))

for b in range(B):
    Xb = np.random.randn(n, p)@D
    Yb = Xb @ theta_true + np.random.randn(n)

    th,thbar = asgd_linear(Xb, Yb,c=0.01)

    err_mat[:, b] = np.sum((th - theta_true_mat)**2, axis=1)
    err_matbar[:, b] = np.sum((thbar - theta_true_mat)**2, axis=1)

# Moyenne sur les répétitions
err_mean = err_mat.mean(axis=1)
err_meanbar = err_matbar.mean(axis=1)

i = np.arange(1, n + 2)

# Plot
plt.figure()
plt.plot(i,err_mean, label="SGD",linewidth=1)
plt.plot(i,err_meanbar, label="ASGD",linewidth=1)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n")
plt.ylabel("Erreur quadratique moyenne")
plt.grid(True, which="both", linestyle="--", alpha=0.1)
plt.show()

TP3 – Algorithmes de gradient stochastiques moyennés¶

Objectifs du TP¶

1. Régression linéaire¶

2. Régression logistique¶

3. Probème mal conditioné¶