/*
* QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
* Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Linq;
using MathNet.Numerics;
using MathNet.Numerics.LinearAlgebra.Double;
using MathNet.Numerics.LinearRegression;
namespace QuantConnect.Indicators
{
///
/// An Autoregressive Intergrated Moving Average (ARIMA) is a time series model which can be used to describe a set of data.
/// In particular,with Xₜ representing the series, the model assumes the data are of form
/// (after differencing times):
///
/// Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ
///
/// where the first sum has an upper limit of and the second .
///
public class AutoRegressiveIntegratedMovingAverage : TimeSeriesIndicator
{
private List _residuals;
private readonly bool _intercept;
private bool _loggedOnceInMovingAverageStep;
private bool _loggedOnceInAutoRegressiveStep;
private readonly RollingWindow _rollingData;
///
/// Differencing coefficient (d). Determines how many times the series should be differenced before fitting the
/// model.
///
private readonly int _diffOrder;
///
/// AR coefficient -- p
///
private readonly int _arOrder;
///
/// MA Coefficient -- q
///
private readonly int _maOrder;
///
/// Whether or not to handle potential exceptions, returning a zero value. I.e, the values
/// provided as input are not valid by the Normal Equations direct regression method
///
public bool HandleExceptions { get; set; } = true;
///
/// Fitted AR parameters (φ terms).
///
public double[] ArParameters { get; private set; }
///
/// Fitted MA parameters (θ terms).
///
public double[] MaParameters { get; private set; }
///
/// Fitted intercept (c term).
///
public double Intercept { get; private set; }
///
/// Gets a flag indicating when this indicator is ready and fully initialized
///
public override bool IsReady => _rollingData.IsReady;
///
/// Required period, in data points, for the indicator to be ready and fully initialized.
///
public override int WarmUpPeriod { get; }
///
/// The variance of the residuals (Var(ε)) from the first step of .
///
public double ArResidualError { get; private set; }
///
/// The variance of the residuals (Var(ε)) from the second step of .
///
public double MaResidualError { get; private set; }
///
/// Fits an ARIMA(arOrder,diffOrder,maOrder) model of form (after differencing it times):
///
/// Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ
///
/// where the first sum has an upper limit of and the second .
/// This particular constructor fits the model by means of for a specified name.
///
/// The name of the indicator
/// AR order (p) -- defines the number of past values to consider in the AR component of the model.
/// Difference order (d) -- defines how many times to difference the model before fitting parameters.
/// MA order -- defines the number of past values to consider in the MA component of the model.
/// Size of the rolling series to fit onto
/// Whether or not to include the intercept term
public AutoRegressiveIntegratedMovingAverage(
string name,
int arOrder,
int diffOrder,
int maOrder,
int period,
bool intercept = true
)
: base(name)
{
if (arOrder < 0 || maOrder < 0)
{
throw new ArgumentException("AR/MA orders cannot be negative.");
}
if (arOrder == 0)
{
throw new ArgumentException("arOrder (p) must be greater than zero for all " +
"currently available fitting methods.");
}
if (period < Math.Max(arOrder, maOrder))
{
throw new ArgumentException("Period must exceed both arOrder and maOrder");
}
_arOrder = arOrder;
_maOrder = maOrder;
_diffOrder = diffOrder;
WarmUpPeriod = period;
_rollingData = new RollingWindow(period);
_intercept = intercept;
}
///
/// Fits an ARIMA(arOrder,diffOrder,maOrder) model of form (after differencing it times):
///
/// Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ
///
/// where the first sum has an upper limit of and the second .
/// This particular constructor fits the model by means of using ordinary least squares.
///
/// AR order (p) -- defines the number of past values to consider in the AR component of the model.
/// Difference order (d) -- defines how many times to difference the model before fitting parameters.
/// MA order (q) -- defines the number of past values to consider in the MA component of the model.
/// Size of the rolling series to fit onto
/// Whether to include an intercept term (c)
public AutoRegressiveIntegratedMovingAverage(
int arOrder,
int diffOrder,
int maOrder,
int period,
bool intercept
)
: this($"ARIMA(({arOrder}, {diffOrder}, {maOrder}), {period}, {intercept})", arOrder, diffOrder, maOrder,
period, intercept)
{
}
///
/// Resets this indicator to its initial state
///
public override void Reset()
{
base.Reset();
_rollingData.Reset();
}
///
/// Forecasts the series of the fitted model one point ahead.
///
/// The input given to the indicator
/// A new value for this indicator
protected override decimal ComputeNextValue(IndicatorDataPoint input)
{
_rollingData.Add((double)input.Value);
if (_rollingData.IsReady)
{
var arrayData = _rollingData.ToArray();
double[] diffHeads = default;
arrayData = _diffOrder > 0 ? DifferenceSeries(_diffOrder, arrayData, out diffHeads) : arrayData;
_diffHeads = diffHeads;
TwoStepFit(arrayData);
double summants = 0;
if (_arOrder > 0)
{
for (var i = 0; i < _arOrder; i++) // AR Parameters
{
summants += ArParameters[i] * arrayData[i];
}
}
if (_maOrder > 0)
{
for (var i = 0; i < _maOrder; i++) // MA Parameters
{
summants += MaParameters[i] * _residuals[_maOrder + i + 1];
}
}
summants += Intercept; // By default equals 0
if (_diffOrder > 0)
{
var dataCast = arrayData.ToList();
dataCast.Insert(0, summants); // Prepends
summants = InverseDifferencedSeries(dataCast.ToArray(), _diffHeads).First(); // Returns disintegrated series
}
return (decimal)summants;
}
return 0m;
}
///
/// Fits the model by means of implementing the following pseudo-code algorithm (in the form of "if{then}"):
///
/// if diffOrder > 0 {Difference data diffOrder times}
/// if arOrder > 0 {Fit the AR model Xₜ = ΣᵢφᵢXₜ; ε's are set to residuals from fitting this.}
/// if maOrder > 0 {Fit the MA parameters left over Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ}
/// Return: φ and θ estimates.
///
/// http://mbhauser.com/informal-notes/two-step-arma-estimation.pdf
///
private void TwoStepFit(double[] series) // Protected for any future inheritors (e.g., SARIMA)
{
_residuals = new List();
double errorAr = 0;
double errorMa = 0;
var lags = _arOrder > 0 ? LaggedSeries(_arOrder, series) : new[] {series};
AutoRegressiveStep(lags, series, errorAr);
if (_maOrder <= 0)
{
return;
}
MovingAverageStep(lags, series, errorMa);
}
///
/// Fits the moving average component in the method.
///
/// An array of lagged data ().
/// The input series, differenced times.
/// The summed residuals (by default 0) associated with the MA component.
private void MovingAverageStep(double[][] lags, double[] data, double errorMa)
{
var appendedData = new List();
var laggedErrors = LaggedSeries(_maOrder, _residuals.ToArray());
for (var i = 0; i < laggedErrors.Length; i++)
{
var doubles = lags[i].ToList();
doubles.AddRange(laggedErrors[i]);
appendedData.Add(doubles.ToArray());
}
double[] maFits = default;
if (HandleExceptions)
{
try
{
maFits = Fit.MultiDim(appendedData.ToArray(), data.Skip(_maOrder).ToArray(),
method: DirectRegressionMethod.NormalEquations, intercept: _intercept);
}
catch (Exception ex)
{
if (!_loggedOnceInMovingAverageStep)
{
Logging.Log.Error($"AutoRegressiveIntegratedMovingAverage.MovingAverageStep(): {ex.Message}");
_loggedOnceInMovingAverageStep = true;
}
// The method Fit.MultiDim takes the appendedData array of mxn(m rows, n columns), computes its
// transpose of size nxm, and then multiplies the tranpose with the original matrix, so the
// resultant matrix is of size nxn. Then a linear system Ax=b is solved where A is the
// aforementioned matrix and b is the data. Thus, the size of the response x is n
//
// It's worth saying that if intercept flag is set to true, the number of columns of the initial
// matrix (appendedData) is increased in one. For more information, please see the implementation
// of Fit.MultiDim() method (Ctrl + right click)
var size = appendedData.ToArray()[0].Length + (_intercept ? 1 : 0);
maFits = new double[size];
}
}
else
{
maFits = Fit.MultiDim(appendedData.ToArray(), data.Skip(_maOrder).ToArray(),
method: DirectRegressionMethod.NormalEquations, intercept: _intercept);
}
for (var i = _maOrder; i < data.Length; i++) // Calculate the error assoc. with model.
{
var paramVector = _intercept
? Vector.Build.Dense(maFits.Skip(1).ToArray())
: Vector.Build.Dense(maFits);
var residual = data[i] - Vector.Build.Dense(appendedData[i - _maOrder]).DotProduct(paramVector);
errorMa += Math.Pow(residual, 2);
}
switch (_intercept)
{
case true:
MaResidualError = errorMa / (data.Length - Math.Max(_arOrder, _maOrder) - 1);
MaParameters = maFits.Skip(1 + _arOrder).ToArray();
ArParameters = maFits.Skip(1).Take(_arOrder).ToArray();
Intercept = maFits[0];
break;
default:
MaResidualError = errorMa / (data.Length - Math.Max(_arOrder, _maOrder) - 1);
MaParameters = maFits.Skip(_arOrder).ToArray();
ArParameters = maFits.Take(_arOrder).ToArray();
break;
}
}
///
/// Fits the autoregressive component in the method.
///
/// An array of lagged data ().
/// The input series, differenced times.
/// The summed residuals (by default 0) associated with the AR component.
private void AutoRegressiveStep(double[][] lags, double[] data, double errorAr)
{
double[] arFits;
if (HandleExceptions)
{
try
{
// The function (lags[time][lagged X]) |---> ΣᵢφᵢXₜ₋ᵢ
arFits = Fit.MultiDim(lags, data.Skip(_arOrder).ToArray(),
method: DirectRegressionMethod.NormalEquations);
}
catch (Exception ex)
{
if (!_loggedOnceInAutoRegressiveStep)
{
Logging.Log.Error($"AutoRegressiveIntegratedMovingAverage.AutoRegressiveStep(): {ex.Message}");
_loggedOnceInAutoRegressiveStep = true;
}
// The method Fit.MultiDim takes the lags array of mxn(m rows, n columns), computes its
// transpose of size nxm, and then multiplies the tranpose with the original matrix, so the
// resultant matrix is of size nxn. Then a linear system Ax=b is solved where A is the
// aforementioned matrix and b is the data. Thus, the size of the response x is n
//
// For more information, please see the implementation of Fit.MultiDim() method (Ctrl + right click)
var size = lags.ToArray()[0].Length;
arFits = new double[size];
}
}
else
{
// The function (lags[time][lagged X]) |---> ΣᵢφᵢXₜ₋ᵢ
arFits = Fit.MultiDim(lags, data.Skip(_arOrder).ToArray(),
method: DirectRegressionMethod.NormalEquations);
}
var fittedVec = Vector.Build.Dense(arFits);
for (var i = 0; i < data.Length; i++) // Calculate the error assoc. with model.
{
if (i < _arOrder)
{
_residuals.Add(0); // 0-padding
continue;
}
var residual = data[i] - Vector.Build.Dense(lags[i - _arOrder]).DotProduct(fittedVec);
errorAr += Math.Pow(residual, 2);
_residuals.Add(residual);
}
ArResidualError = errorAr / (data.Length - _arOrder - 1);
if (_maOrder == 0)
{
ArParameters = arFits; // Will not be thrown out
}
}
}
}