import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

np.random.seed(123)
n = 10000
p = 5
theta_true = np.array([-2, -1, 0, 1, 2])


X = np.random.randn(n, p)
Y = X @ theta_true + np.random.randn(n)


def newton_linear(X, Y):

    n, p = X.shape
    
    theta = np.zeros((n+1, p))
    H = np.eye(p)  # H^{-1}_0
    
    for i in range(n):
        # Gradient stochastique
        grad = -(Y[i] - X[i] @ theta[i]) * X[i]
        
        # Mise à jour de theta
        step = (i+1) / (i+100)
        theta[i+1] = theta[i] - step * H @ grad
        
        # Mise à jour de H^{-1} (formule de Sherman-Morrison)
        u = H @ X[i]
        denom = 1 + X[i] @ u
        H = H - np.outer(u, u) / denom
        
    return theta


def asgd_linear(X, Y, c=1,alpha=0.75):
    n, p = X.shape
    theta = np.zeros(p)
    thetabar = np.zeros(p)

    theta_path = np.zeros((n+1, p))
    thetabar_path = np.zeros((n+1, p))

    for i in range(n):
        grad = (X[i] @ theta - Y[i]) * X[i]
        theta = theta - c*(i+1)**(-alpha) * grad
        thetabar = thetabar + (theta- thetabar) / (i + 1)

        theta_path[i+1] = theta
        thetabar_path[i+1] = thetabar

    return theta_path, thetabar_path


theta_new = newton_linear(X,Y)
theta_path, thetabar_path = asgd_linear(X, Y)

errnew = np.sum((theta_new - theta_true)**2, axis=1)
err = np.sum((theta_path - theta_true)**2, axis=1)
errbar = np.sum((thetabar_path - theta_true)**2, axis=1)

plt.figure()
plt.plot(err, label="SGD")
plt.plot(errbar, label="ASGD")
plt.plot(errnew, label="Newton")
plt.xscale("log")
plt.yscale("log")
plt.xlabel("Iteration")
plt.ylabel("Squared error")
plt.legend()
plt.show()


B = 50
n, p = X.shape

# Matrice des erreurs : (n+1) x B
err_mat = np.zeros((n + 1, B))
err_matbar = np.zeros((n + 1, B))
err_new = np.zeros((n + 1, B))

theta_true_mat = np.tile(theta_true, (n + 1, 1))

for b in range(B):
    Xb = np.random.randn(n, p)
    Yb = Xb @ theta_true + np.random.randn(n)

    th,thbar = asgd_linear(Xb, Yb)
    thnew = newton_linear(Xb,Yb)
    err_mat[:, b] = np.sum((th - theta_true_mat)**2, axis=1)
    err_new[:, b] = np.sum((thnew - theta_true_mat)**2, axis=1)
    err_matbar[:, b] = np.sum((thbar - theta_true_mat)**2, axis=1)

# Moyenne sur les répétitions
err_mean = err_mat.mean(axis=1)
err_meannew = err_new.mean(axis=1)
err_meanbar = err_matbar.mean(axis=1)

i = np.arange(1, n + 2)

# Plot
plt.figure()
plt.plot(i,err_mean, label="SGD",linewidth=1)
plt.plot(i,err_meanbar, label="ASGD",linewidth=1)
plt.plot(i,err_meannew, label="Newton",linewidth=1)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n")
plt.ylabel("Erreur quadratique moyenne")
plt.grid(True, which="both", linestyle="--", alpha=0.1)
plt.show()


D = np.sqrt(np.diag([1e-2, 1e-1, 1, 10, 1e2]))
X = np.random.randn(n, p) @ D
Y = X @ theta_true + np.random.randn(n)

B = 50
n, p = X.shape

# Matrice des erreurs : (n+1) x B
err_matnew = np.zeros((n + 1, B))
err_mat = np.zeros((n + 1, B))
err_matbar = np.zeros((n + 1, B))

theta_true_mat = np.tile(theta_true, (n + 1, 1))

for b in range(B):
    Xb = np.random.randn(n, p)@D
    Yb = Xb @ theta_true + np.random.randn(n)

    th,thbar = asgd_linear(Xb, Yb,c=0.01)
    thnew=newton_linear(Xb,Yb)
    err_mat[:, b] = np.sum((th - theta_true_mat)**2, axis=1)
    err_matnew[:, b] = np.sum((thnew - theta_true_mat)**2, axis=1)
    err_matbar[:, b] = np.sum((thbar - theta_true_mat)**2, axis=1)

# Moyenne sur les répétitions
err_mean = err_mat.mean(axis=1)
err_meanbar = err_matbar.mean(axis=1)
err_meannew = err_matnew.mean(axis=1)

i = np.arange(1, n + 2)

# Plot
plt.figure()
plt.plot(i,err_mean, label="SGD",linewidth=1)
plt.plot(i,err_meanbar, label="ASGD",linewidth=1)
plt.plot(i,err_meannew, label="Newton",linewidth=1)
plt.xscale("log")
plt.yscale("log")
plt.xlabel("n")
plt.ylabel("Erreur quadratique moyenne")
plt.grid(True, which="both", linestyle="--", alpha=0.1)
plt.show()

TP4 – Algorithmes de Newton stochastiques¶

Objectifs du TP¶

1. Régression linéaire¶

3. Probème mal conditioné¶