Basic Introduction to OOS

This introduction demonstrates how one may conduct a basic forecasting exercise with OOS. For more detail examples and documentation, please see the OOS website.

0. Environment

library(OOS)

1. Data

# pull and prepare data from FRED
quantmod::getSymbols.FRED(
    c('UNRATE','INDPRO','GS10'), 
    env = globalenv())
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
#> [1] "UNRATE" "INDPRO" "GS10"
Data = cbind(UNRATE, INDPRO, GS10)
Data = data.frame(Data, date = zoo::index(Data)) %>%
    dplyr::filter(lubridate::year(date) >= 1990) %>% 
  na.omit()

# make industrial production and 10-year Treasury stationary
Data = Data %>%
  dplyr::mutate(
    GS10 = GS10 - dplyr::lag(GS10), 
    INDPRO = (INDPRO - lag(INDPRO, 12))/lag(INDPRO, 12)) 

# start data when all three variables are available
# (this is not necessary, but it will suppress warnings for us)
Data = dplyr::filter(Data, date >= as.Date('1954-01-01'))

2. Forecasting

# run univariate forecasts 
forecast.uni = 
    forecast_univariate(
        Data = dplyr::select(Data, date, UNRATE),
        forecast.dates = tail(Data$date,5), 
        method = c('naive'), #,'auto.arima', 'ets'),      
        horizon = 1,                         
        recursive = FALSE,      
        rolling.window = NA,    
        freq = 'month')
#> forecast_univariate.control_panel was instantiated and default values will be used for model estimation.

# create multivariate forecasts
forecast.multi = 
    forecast_multivariate(
        Data = Data,           
        forecast.date = tail(Data$date,5),
        target = 'UNRATE',
        horizon = 1,
        method = c('lasso'),       
        rolling.window = NA,    
        freq = 'month')
#> forecast_multivariate.ml.control_panel was instantiated and default values will be used for ML model estimation.
#> forecast_multivariate.var.control_panel was instantiated and default values will be used for VAR model estimation.
#> Loading required package: lattice
#> Loading required package: ggplot2
#> 
#> Attaching package: 'caret'
#> The following object is masked from 'package:purrr':
#> 
#>     lift

# combine forecasts and add in observed values
forecasts = 
    dplyr::bind_rows(
        forecast.uni,
        forecast.multi) %>%
    dplyr::left_join( 
        dplyr::select(Data, date, observed = UNRATE),
        by = 'date')

# forecast combinations 
forecast.combo = 
    forecast_combine(
        forecasts, 
        method = c('uniform','median','trimmed.mean'))

Forecast Analysis

# merge forecast combinations back into forecasts
forecasts = 
    forecasts %>%
    dplyr::bind_rows(forecast.combo)

# calculate forecast errors
forecast.error = forecast_accuracy(forecasts)

# view forecast errors from least to greatest 
#   (best forecast to worst forecast method)
forecast.error %>% 
    dplyr::mutate_at(vars(-model), round, 3) %>%
    dplyr::arrange(MSE)
#> # A tibble: 5 x 5
#>   model                MSE  RMSE   MAE  MAPE
#>   <chr>              <dbl> <dbl> <dbl> <dbl>
#> 1 median.combo       0.06  0.246 0.185 0.028
#> 2 trimmed.mean.combo 0.06  0.246 0.185 0.028
#> 3 uniform.combo      0.06  0.246 0.185 0.028
#> 4 naive              0.252 0.502 0.375 0.056
#> 5 lasso              0.589 0.768 0.735 0.11

# compare forecasts to the baseline (a random walk)
forecast_comparison(
    forecasts,
    baseline.forecast = 'naive',  
    test = 'ER',
    loss = 'MSE') %>% 
    dplyr::arrange(error.ratio)
#>                model error.ratio
#> 1       median.combo   0.2394147
#> 2 trimmed.mean.combo   0.2394147
#> 3      uniform.combo   0.2394147
#> 4              naive   1.0000000
#> 5              lasso   2.3345662

# chart forecasts
chart = 
    chart_forecast(
        forecasts,              
        Title = 'US Unemployment Rate',
        Ylab = 'Index',
        Freq = 'Monthly')