library(ggplot2)
library(tidyr)
library(dplyr)
set.seed(123)
n <- 10000
p <- 5
theta_true <- c(-2,-1,0,1,2)
L’objectif de ce TP est d’étudier le comportement des estimateurs moyennés et de Newtonstochastique dans le cadre de la régression linéaire.
On considère le modèle
\[ Y_i = X_i^T \theta + \varepsilon_i, \]
avec :
On note \(n = 10000\).
X <- matrix(rnorm(n*p), n, p)
Y <- X %*% theta_true + rnorm(n)
asgd_linear <- function(X, Y, c=1,alpha=0.75) {
n <- nrow(X); p <- ncol(X)
theta <- matrix(0, n+1, p)
thetabar <- matrix(0, n+1, p)
for(i in 1:n){
grad <- -(Y[i] - sum(X[i,]*theta[i,])) * X[i,]
theta[i+1,] <- theta[i,] - c* i^(-alpha)*grad
thetabar[i+1,] <- thetabar[i,] * (i+1)/(i+2) + 1/(i+2)*theta[i+1,]
}
list(theta=theta,thetabar=thetabar)
}
theta_path <- asgd_linear(X,Y)
err <- rowSums((theta_path$theta - matrix(theta_true,n+1,p,byrow=TRUE))^2)
errbar <- rowSums((theta_path$thetabar - matrix(theta_true,n+1,p,byrow=TRUE))^2)
df <- data.frame(
i = seq_along(err),
err = err,
errbar=errbar
)
df_long <- df %>%
pivot_longer(
cols = c(err, errbar),
names_to = "method",
values_to = "error"
)
ggplot(df_long, aes(x = i, y = error, color = method)) +
geom_line(linewidth = 1) +
scale_y_log10() +
scale_x_log10() +
scale_color_manual(
values = c(
err = "blue",
errbar = "red"
),
labels = c(
err = "SGD",
errbar = "ASGD"
)
) +
labs(
x = "n",
y = "Erreur quadratique",
color = ""
) +
theme_minimal(base_size = 14)
B <- 50
err_mat <- matrix(0,n+1,B)
errbar_mat <- matrix(0,n+1,B)
for(b in 1:B){
X <- matrix(rnorm(n*p), n, p)
Y <- X %*% theta_true + rnorm(n)
th <- asgd_linear(X,Y)
err_mat[,b] <- rowSums((th$theta - matrix(theta_true,n+1,p,byrow=TRUE))^2)
errbar_mat[,b] <- rowSums((th$thetabar - matrix(theta_true,n+1,p,byrow=TRUE))^2)
}
df <- data.frame(
i = seq_along(rowMeans(err_mat)),
err = rowMeans(err_mat),
errbar= rowMeans(errbar_mat)
)
df_long <- df %>%
pivot_longer(
cols = c(err, errbar),
names_to = "method",
values_to = "error"
)
ggplot(df_long, aes(x = i, y = error, color = method)) +
geom_line(linewidth = 1) +
scale_y_log10() +
scale_x_log10() +
scale_color_manual(
values = c(
err = "blue",
errbar = "red"
),
labels = c(
err = "SGD",
errbar = "ASGD"
)
) +
labs(
x = "n",
y = "Erreur quadratique moyenne",
color = ""
) +
theme_minimal(base_size = 14)
B <- 50
err_mat_c1 <- matrix(0,n+1,B)
err_mat_c01 <- matrix(0,n+1,B)
err_mat_c001 <- matrix(0,n+1,B)
errbar_mat_c1 <- matrix(0,n+1,B)
errbar_mat_c01 <- matrix(0,n+1,B)
errbar_mat_c001 <- matrix(0,n+1,B)
for(b in 1:B){
X <- matrix(rnorm(n*p), n, p)%*%diag(1:5)
Y <- X %*% theta_true + rnorm(n)
th <- asgd_linear(X,Y,c=0.3)
th001 <- asgd_linear(X,Y,c=0.01)
th01 <- asgd_linear(X,Y,c=0.1)
err_mat_c1[,b] <- rowSums((th$theta - matrix(theta_true,n+1,p,byrow=TRUE))^2)
err_mat_c01[,b] <- rowSums((th01$theta - matrix(theta_true,n+1,p,byrow=TRUE))^2)
err_mat_c001[,b] <- rowSums((th001$theta - matrix(theta_true,n+1,p,byrow=TRUE))^2)
errbar_mat_c1[,b] <- rowSums((th$thetabar - matrix(theta_true,n+1,p,byrow=TRUE))^2)
errbar_mat_c01[,b] <- rowSums((th01$thetabar - matrix(theta_true,n+1,p,byrow=TRUE))^2)
errbar_mat_c001[,b] <- rowSums((th001$thetabar - matrix(theta_true,n+1,p,byrow=TRUE))^2)
}
df <- data.frame(
i = seq_len(nrow(err_mat_c1)),
SGD_c1 = rowMeans(err_mat_c1),
SGD_c01 = rowMeans(err_mat_c01),
SGD_c001 = rowMeans(err_mat_c001),
ASGD_c1 = rowMeans(errbar_mat_c1),
ASGD_c01 = rowMeans(errbar_mat_c01),
ASGD_c001 = rowMeans(errbar_mat_c001)
)
# Passage en format long
df_long <- df %>%
pivot_longer(
cols = -i,
names_to = "method_c",
values_to = "error"
) %>%
separate(method_c, into = c("method", "c"), sep = "_c") %>%
mutate(
c = paste0("c = ", c),
method = factor(method, levels = c("SGD", "ASGD"))
)
ggplot(df_long, aes(x = i, y = error,
color = method, linetype = c)) +
geom_line(linewidth = 1) +
scale_y_log10() +
labs(
x = "n",
y = "Erreur quadratique moyenne",
color = "",
linetype = ""
) +
theme_minimal(base_size = 14)
1.Écrire une fonction R qui ressorte l’ensemble des estimateurs \({\theta}_i\) obtenu avec l’algorithme de Newton stochastique : \[ {\theta}_{i+1} = {\theta}_{i} + \frac{i+1}{i+100}H_{i}^{-1}\left( Y_{i+1} - X_{i+1}^{T} {\theta}_{i} \right) X_{i+1} \] avec \[ H_{i+1}^{-1} = H_{i}^{-1} - \left( 1+ X_{i+1}^{T}H_{i}^{-1}X_{i+1} \right)^{-1}H_{i}^{-1}X_{i+1}X_{i+1}^{T}H_{i}^{-1} . \] A noter qu’ici on a pris \(i+100\) au lieu de \(i+1\) pour des questions de stabilités au début de l’algorithme.
Newton_linear <- function(X, Y, c=1,alpha=0.75) {
n <- nrow(X); p <- ncol(X)
theta <- matrix(0, n+1, p)
H=diag(p)
for(i in 1:n){
grad <- -(Y[i] - sum(X[i,]*theta[i,])) * X[i,]
theta[i+1,] <- theta[i,] - (i+1)/(i+100)*H%*%grad
u=H%*%(X[i,])
ut=t(u)
H=H-(1+sum(X[i,]*u))^(-1)*u%*%ut
}
theta
}
theta_newton <- Newton_linear(X,Y)
err <- rowSums((theta_newton - matrix(theta_true,n+1,p,byrow=TRUE))^2)
df <- data.frame(
i = seq_along(err),
err = err
)
ggplot(df, aes(x = i, y = err)) +
geom_line(color = "blue", linewidth = 1) +
scale_x_log10() +
scale_y_log10() +
labs(
x = "n",
y = "Erreur quadratique"
) +
theme_minimal()
B <- 50
err_newton <- matrix(0,n+1,B)
err_mat_c1 <- matrix(0,n+1,B)
err_mat_c01 <- matrix(0,n+1,B)
err_mat_c001 <- matrix(0,n+1,B)
errbar_mat_c1 <- matrix(0,n+1,B)
errbar_mat_c01 <- matrix(0,n+1,B)
errbar_mat_c001 <- matrix(0,n+1,B)
for(b in 1:B){
X <- matrix(rnorm(n*p), n, p)%*%diag((1:5))
Y <- X %*% theta_true + rnorm(n)
th <- asgd_linear(X,Y,c=0.3)
th001 <- asgd_linear(X,Y,c=0.01)
th01 <- asgd_linear(X,Y,c=0.1)
newt=Newton_linear(X,Y)
err_newton[,b] <- rowSums((newt - matrix(theta_true,n+1,p,byrow=TRUE))^2)
err_mat_c1[,b] <- rowSums((th$theta - matrix(theta_true,n+1,p,byrow=TRUE))^2)
err_mat_c01[,b] <- rowSums((th01$theta - matrix(theta_true,n+1,p,byrow=TRUE))^2)
err_mat_c001[,b] <- rowSums((th001$theta - matrix(theta_true,n+1,p,byrow=TRUE))^2)
errbar_mat_c1[,b] <- rowSums((th$thetabar - matrix(theta_true,n+1,p,byrow=TRUE))^2)
errbar_mat_c01[,b] <- rowSums((th01$thetabar - matrix(theta_true,n+1,p,byrow=TRUE))^2)
errbar_mat_c001[,b] <- rowSums((th001$thetabar - matrix(theta_true,n+1,p,byrow=TRUE))^2)
}
df <- data.frame(
i = seq_len(nrow(err_mat_c1)),
Newton = rowMeans(err_newton),
SGD_c1 = rowMeans(err_mat_c1),
SGD_c01 = rowMeans(err_mat_c01),
SGD_c001 = rowMeans(err_mat_c001),
ASGD_c1 = rowMeans(errbar_mat_c1),
ASGD_c01 = rowMeans(errbar_mat_c01),
ASGD_c001 = rowMeans(errbar_mat_c001)
)
# Passage en format long
df_long <- df %>%
pivot_longer(
cols = -i,
names_to = "method_c",
values_to = "error"
) %>%
mutate(
method = case_when(
method_c == "Newton" ~ "Newton",
grepl("^SGD", method_c) ~ "SGD",
grepl("^ASGD", method_c) ~ "ASGD"
),
c = case_when(
method_c == "Newton" ~ "Newton",
grepl("c1$", method_c) ~ "c = 1",
grepl("c01$", method_c) ~ "c = 0.1",
grepl("c001$", method_c) ~ "c = 0.01"
)
) %>%
mutate(
method = factor(method, levels = c("Newton", "SGD", "ASGD")),
c = factor(c, levels = c("Newton", "c = 1", "c = 0.1", "c = 0.01"))
)
ggplot(df_long, aes(x = i, y = error,
color = method, linetype = c)) +
geom_line(linewidth = 1) +
scale_y_log10() +
scale_x_log10() +
scale_linetype_manual(
values = c(
"Newton" = "solid",
"c = 1" = "dashed",
"c = 0.1" = "dotdash",
"c = 0.01" = "dotted"
)
) +
labs(
x = "n",
y = "Erreur quadratique moyenne",
color = "",
linetype = ""
) +
theme_minimal(base_size = 14)