Bayesian Multiple Regression

5 minute read

We can replace normal multiple regression with Bayesian Multiple Regression.

Model and Data

First, we need to create simulation data. Let’s consider this model:

Generate data with R:

num_data <- 1000
N <- num_data

age <- rnorm(num_data, mean=50, sd=15)
income <- rnorm(num_data, mean=500, sd=100) # using Normal might be wrong, but just for simulation data
data <- data.frame(age, income)
covariates <- as.matrix(data[,c("age", "income")])


## Muliple Regression
true_beta0 <- 1.5
true_beta1 <- 2.8
true_beta2 <- -1.2
tau_true <- 1
betaX <- c(true_beta1, true_beta2)
data$Y <- true_beta0 + covariates %*% betaX + rnorm(num_data, sd=1/sqrt(tau_true))

Likelihood function

Likelihood function is simple:

Update Equations

Update for (intercept)

Note that and are Normal distribution, so we can rewrite them. Also, log Normal PDF is important in deriving.

Another important and useful property of Normal distribution is that, when we focus on ,

Also, remember . Now, let’s resume (1).

By comparing the last equation with (2), we can say

# beta_0
sample_beta_0 <- function(y, x1, x2, beta_1, beta_2, tau, mu_0, tau_0){
  precision <- tau_0 + tau * N
  mu <- tau_0 * mu_0 + tau * sum(y - (beta_1 * x1) - (beta_2 * x2))
  mu <- mu / precision
  value <- rnorm(1, mean=mu, sd=1/sqrt(precision))
  return(value)
}

Update for (coefficient)

Derivation is quite similar until (1). We have slightly different form for (3) because we now focus on .

Now, we get

# beta_1
sample_beta_1 <- function(y, x1, x2, beta_0, beta_2, tau, mu_1, tau_1){
  precision <- tau_1 + tau * sum(x1*x1)
  mu <- tau_1 * mu_1 + tau * sum(x1 * (y - beta_0 - beta_2 * x2))
  mu <- mu / precision
  value <- rnorm(1, mean=mu, sd=1/sqrt(precision))
  return(value)
}

We can get the similar equation for .

# beta_2
sample_beta_2 <- function(y, x1, x2, beta_0, beta_1, tau, mu_2, tau_2){
  precision <- tau_2 + tau * sum(x2*x2)
  mu <- tau_2 * mu_2 + tau * sum(x2 * (y - beta_0 - beta_1 * x1))
  mu <- mu / precision
  value <- rnorm(1, mean=mu, sd=1/sqrt(precision))
  return(value)
}

Update for (precision)

Let’s recall log PDF of Gamma distribution:

We can get updat equation:

So, we get

# tau
sample_tau <- function(y, x1, x2, beta_0, beta_1, beta_2, alpha, beta){
  alpha_new <- alpha + N/2
  resid <- y - beta_0 - (beta_1 * x1) - (beta_2 * x2)
  beta_new <- beta + sum(resid*resid) / 2
  value <- rgamma(1, shape=alpha_new, rate=beta_new)
    # alpha = shape, beta = 1/rate in rgamma()
}

Sampling

iter_num <- 15000

# Prepare storages
beta_0 <- rep(NA, iter_num)
beta_1 <- rep(NA, iter_num)
beta_2 <- rep(NA, iter_num)
tau <- rep(NA, iter_num)

# Initialization
beta_0[1] <- 0
beta_1[1] <- 1
beta_2[1] <- 2
tau[1] <- 2
# Run
for(i in 1:(iter_num-1)){
  beta_0[i+1] <- sample_beta_0(data$Y, data$age, data$income, beta_1[i], beta_2[i], tau[i], 0, 0.01)
  beta_1[i+1] <- sample_beta_1(data$Y, data$age, data$income, beta_0[i+1], beta_2[i], tau[i], 0, 0.01)
  beta_2[i+1] <- sample_beta_2(data$Y, data$age, data$income, beta_0[i+1], beta_1[i], tau[i], 0, 0.01)
  tau[i+1] <- sample_tau(data$Y, data$age, data$income, beta_0[i+1], beta_1[i+1], beta_2[i+1], alpha=2, beta=1)
}

Result

Code

# Set working directory
setwd("/Users/Study/Analysis/")

# Load library
library(tidyverse)
###############################################
# Bayesian Linear Regression (Gibbs Sampling) #
###############################################
iter_num <- 15000
burn_in_num <- 12000

####################
# Create fake data #
####################
num_data <- 1000
N <- num_data

age <- rnorm(num_data, mean=50, sd=15)
income <- rnorm(num_data, mean=500, sd=100)
data <- data.frame(age, income)
covariates <- as.matrix(data[,c("age", "income")])


## Multiple Regression
true_beta0 <- 1.5
true_beta1 <- 2.8
true_beta2 <- -1.2
tau_true <- 1
betaX <- c(true_beta1, true_beta2)
data$Y <- true_beta0 + covariates %*% betaX + rnorm(num_data, sd=1/sqrt(tau_true))


## Check with package
res_lm <- lm(Y ~ age + income, data)
summary(res_lm)

#############
# Inference #
#############
sample_beta_0 <- function(y, x1, x2, beta_1, beta_2, tau, mu_0, tau_0){
  precision <- tau_0 + tau * N
  mu <- tau_0 * mu_0 + tau * sum(y - (beta_1 * x1) - (beta_2 * x2))
  mu <- mu / precision
  value <- rnorm(1, mean=mu, sd=1/sqrt(precision))
  return(value)
}


sample_beta_1 <- function(y, x1, x2, beta_0, beta_2, tau, mu_1, tau_1){
  precision <- tau_1 + tau * sum(x1*x1)
  mu <- tau_1 * mu_1 + tau * sum(x1 * (y - beta_0 - beta_2 * x2))
  mu <- mu / precision
  value <- rnorm(1, mean=mu, sd=1/sqrt(precision))
  return(value)
}

sample_beta_2 <- function(y, x1, x2, beta_0, beta_1, tau, mu_2, tau_2){
  precision <- tau_2 + tau * sum(x2*x2)
  mu <- tau_2 * mu_2 + tau * sum(x2 * (y - beta_0 - beta_1 * x1))
  mu <- mu / precision
  value <- rnorm(1, mean=mu, sd=1/sqrt(precision))
  return(value)
}

sample_tau <- function(y, x1, x2, beta_0, beta_1, beta_2, alpha, beta){
  alpha_new <- alpha + N/2
  resid <- y - beta_0 - (beta_1 * x1) - (beta_2 * x2)
  beta_new <- beta + sum(resid*resid) / 2
  value <- rgamma(1, shape=alpha_new, rate=beta_new)
    # alpha = shape, beta = 1/rate
}

# Prepare storages
beta_0 <- rep(NA, iter_num)
beta_1 <- rep(NA, iter_num)
beta_2 <- rep(NA, iter_num)
tau <- rep(NA, iter_num)

# Initialization
beta_0[1] <- 0
beta_1[1] <- 1
beta_2[1] <- 2
tau[1] <- 2
# Run
for(i in 1:(iter_num-1)){
  beta_0[i+1] <- sample_beta_0(data$Y, data$age, data$income, beta_1[i], beta_2[i], tau[i], 0, 0.01)
  beta_1[i+1] <- sample_beta_1(data$Y, data$age, data$income, beta_0[i+1], beta_2[i], tau[i], 0, 0.01)
  beta_2[i+1] <- sample_beta_2(data$Y, data$age, data$income, beta_0[i+1], beta_1[i], tau[i], 0, 0.01)
  tau[i+1] <- sample_tau(data$Y, data$age, data$income, beta_0[i+1], beta_1[i+1], beta_2[i+1], alpha=2, beta=1)
}


# Make Figure
make_figure <- function(params, true_values, params_names, burn_in=2000, slice=5){
  num_params <- length(params)

  tidy_params <- list()
  for(i in 1:num_params){
    temp <- data.frame(value=params[[i]])
    temp$parameter <- params_names[i]
    temp$iter <- 1:nrow(temp)

    temp <- temp[burn_in:nrow(temp) ,]
    slice_index <- seq(1, nrow(temp), slice)# slice data
    temp <- temp[slice_index, ]
    tidy_params[[i]] <- temp
  }
  tidy_params <- do.call(rbind.data.frame, tidy_params)

  true <- data.frame(
    parameter = params_names,
    value = true_values
    )

  param <- ggplot(data=tidy_params, aes(x=iter, y=value, group=parameter, color=parameter)) + 
      geom_line() + 
      geom_point(size=0.3) +
      facet_wrap(~parameter, ncol=2, scales = "free") +
      geom_hline(data = true, aes(yintercept = value), size=0.7) +
      theme_bw() +
      theme(legend.position="none")

  param_density <- ggplot(data=tidy_params, aes(x=value, color=parameter, fill=parameter)) +
    geom_density(stat = "density", position = "identity",alpha=0.6) + 
    facet_wrap(~parameter, ncol=2, scales = "free") +
    geom_vline(data = true, aes(xintercept = value), size=0.7) +
    theme_bw() +
    theme(legend.position="none")

  return(list(param, param_density))

}

params <- list(beta_0, beta_1, beta_2, tau)
true_values <- c(true_beta0, true_beta1, true_beta2, tau_true)
params_names <- c("beta_0", "beta_1", "beta_2", "tau")
gs_res3 <- make_figure(params, true_values, params_names, burn_in=burn_in_num, slice=5)
gs_res3[[1]]
gs_res3[[2]]
saveRDS(gs_res3,"figure.obj")
saveRDS(data,"data.obj")
ggsave("trace.png", gs_res3[[1]], width=7, height=5)
ggsave("hist.png", gs_res3[[2]], width=7, height=6)

Updated: