This introduction demonstrates how one may conduct a basic forecasting exercise with OOS. For more detail examples and documentation, please see the OOS website.
library(OOS)
# pull and prepare data from FRED
::getSymbols.FRED(
quantmodc('UNRATE','INDPRO','GS10'),
env = globalenv())
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
#> [1] "UNRATE" "INDPRO" "GS10"
= cbind(UNRATE, INDPRO, GS10)
Data = data.frame(Data, date = zoo::index(Data)) %>%
Data ::filter(lubridate::year(date) >= 1990) %>%
dplyrna.omit()
# make industrial production and 10-year Treasury stationary
= Data %>%
Data ::mutate(
dplyrGS10 = GS10 - dplyr::lag(GS10),
INDPRO = (INDPRO - lag(INDPRO, 12))/lag(INDPRO, 12))
# start data when all three variables are available
# (this is not necessary, but it will suppress warnings for us)
= dplyr::filter(Data, date >= as.Date('1954-01-01')) Data
# run univariate forecasts
=
forecast.uni forecast_univariate(
Data = dplyr::select(Data, date, UNRATE),
forecast.dates = tail(Data$date,5),
method = c('naive'), #,'auto.arima', 'ets'),
horizon = 1,
recursive = FALSE,
rolling.window = NA,
freq = 'month')
#> forecast_univariate.control_panel was instantiated and default values will be used for model estimation.
# create multivariate forecasts
=
forecast.multi forecast_multivariate(
Data = Data,
forecast.date = tail(Data$date,5),
target = 'UNRATE',
horizon = 1,
method = c('lasso'),
rolling.window = NA,
freq = 'month')
#> forecast_multivariate.ml.control_panel was instantiated and default values will be used for ML model estimation.
#> forecast_multivariate.var.control_panel was instantiated and default values will be used for VAR model estimation.
#> Loading required package: lattice
#> Loading required package: ggplot2
#>
#> Attaching package: 'caret'
#> The following object is masked from 'package:purrr':
#>
#> lift
# combine forecasts and add in observed values
=
forecasts ::bind_rows(
dplyr
forecast.uni,%>%
forecast.multi) ::left_join(
dplyr::select(Data, date, observed = UNRATE),
dplyrby = 'date')
# forecast combinations
=
forecast.combo forecast_combine(
forecasts, method = c('uniform','median','trimmed.mean'))
# merge forecast combinations back into forecasts
=
forecasts %>%
forecasts ::bind_rows(forecast.combo)
dplyr
# calculate forecast errors
= forecast_accuracy(forecasts)
forecast.error
# view forecast errors from least to greatest
# (best forecast to worst forecast method)
%>%
forecast.error ::mutate_at(vars(-model), round, 3) %>%
dplyr::arrange(MSE)
dplyr#> # A tibble: 5 x 5
#> model MSE RMSE MAE MAPE
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 median.combo 0.06 0.246 0.185 0.028
#> 2 trimmed.mean.combo 0.06 0.246 0.185 0.028
#> 3 uniform.combo 0.06 0.246 0.185 0.028
#> 4 naive 0.252 0.502 0.375 0.056
#> 5 lasso 0.589 0.768 0.735 0.11
# compare forecasts to the baseline (a random walk)
forecast_comparison(
forecasts,baseline.forecast = 'naive',
test = 'ER',
loss = 'MSE') %>%
::arrange(error.ratio)
dplyr#> model error.ratio
#> 1 median.combo 0.2394147
#> 2 trimmed.mean.combo 0.2394147
#> 3 uniform.combo 0.2394147
#> 4 naive 1.0000000
#> 5 lasso 2.3345662
# chart forecasts
=
chart chart_forecast(
forecasts, Title = 'US Unemployment Rate',
Ylab = 'Index',
Freq = 'Monthly')