/* * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals. * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.Linq; using MathNet.Numerics; using MathNet.Numerics.LinearAlgebra.Double; using MathNet.Numerics.LinearRegression; namespace QuantConnect.Indicators { /// /// An Autoregressive Intergrated Moving Average (ARIMA) is a time series model which can be used to describe a set of data. /// In particular,with Xₜ representing the series, the model assumes the data are of form /// (after differencing times): /// /// Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ /// /// where the first sum has an upper limit of and the second . /// public class AutoRegressiveIntegratedMovingAverage : TimeSeriesIndicator { private List _residuals; private readonly bool _intercept; private bool _loggedOnceInMovingAverageStep; private bool _loggedOnceInAutoRegressiveStep; private readonly RollingWindow _rollingData; /// /// Differencing coefficient (d). Determines how many times the series should be differenced before fitting the /// model. /// private readonly int _diffOrder; /// /// AR coefficient -- p /// private readonly int _arOrder; /// /// MA Coefficient -- q /// private readonly int _maOrder; /// /// Whether or not to handle potential exceptions, returning a zero value. I.e, the values /// provided as input are not valid by the Normal Equations direct regression method /// public bool HandleExceptions { get; set; } = true; /// /// Fitted AR parameters (φ terms). /// public double[] ArParameters { get; private set; } /// /// Fitted MA parameters (θ terms). /// public double[] MaParameters { get; private set; } /// /// Fitted intercept (c term). /// public double Intercept { get; private set; } /// /// Gets a flag indicating when this indicator is ready and fully initialized /// public override bool IsReady => _rollingData.IsReady; /// /// Required period, in data points, for the indicator to be ready and fully initialized. /// public override int WarmUpPeriod { get; } /// /// The variance of the residuals (Var(ε)) from the first step of . /// public double ArResidualError { get; private set; } /// /// The variance of the residuals (Var(ε)) from the second step of . /// public double MaResidualError { get; private set; } /// /// Fits an ARIMA(arOrder,diffOrder,maOrder) model of form (after differencing it times): /// /// Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ /// /// where the first sum has an upper limit of and the second . /// This particular constructor fits the model by means of for a specified name. /// /// The name of the indicator /// AR order (p) -- defines the number of past values to consider in the AR component of the model. /// Difference order (d) -- defines how many times to difference the model before fitting parameters. /// MA order -- defines the number of past values to consider in the MA component of the model. /// Size of the rolling series to fit onto /// Whether or not to include the intercept term public AutoRegressiveIntegratedMovingAverage( string name, int arOrder, int diffOrder, int maOrder, int period, bool intercept = true ) : base(name) { if (arOrder < 0 || maOrder < 0) { throw new ArgumentException("AR/MA orders cannot be negative."); } if (arOrder == 0) { throw new ArgumentException("arOrder (p) must be greater than zero for all " + "currently available fitting methods."); } if (period < Math.Max(arOrder, maOrder)) { throw new ArgumentException("Period must exceed both arOrder and maOrder"); } _arOrder = arOrder; _maOrder = maOrder; _diffOrder = diffOrder; WarmUpPeriod = period; _rollingData = new RollingWindow(period); _intercept = intercept; } /// /// Fits an ARIMA(arOrder,diffOrder,maOrder) model of form (after differencing it times): /// /// Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ /// /// where the first sum has an upper limit of and the second . /// This particular constructor fits the model by means of using ordinary least squares. /// /// AR order (p) -- defines the number of past values to consider in the AR component of the model. /// Difference order (d) -- defines how many times to difference the model before fitting parameters. /// MA order (q) -- defines the number of past values to consider in the MA component of the model. /// Size of the rolling series to fit onto /// Whether to include an intercept term (c) public AutoRegressiveIntegratedMovingAverage( int arOrder, int diffOrder, int maOrder, int period, bool intercept ) : this($"ARIMA(({arOrder}, {diffOrder}, {maOrder}), {period}, {intercept})", arOrder, diffOrder, maOrder, period, intercept) { } /// /// Resets this indicator to its initial state /// public override void Reset() { base.Reset(); _rollingData.Reset(); } /// /// Forecasts the series of the fitted model one point ahead. /// /// The input given to the indicator /// A new value for this indicator protected override decimal ComputeNextValue(IndicatorDataPoint input) { _rollingData.Add((double)input.Value); if (_rollingData.IsReady) { var arrayData = _rollingData.ToArray(); double[] diffHeads = default; arrayData = _diffOrder > 0 ? DifferenceSeries(_diffOrder, arrayData, out diffHeads) : arrayData; _diffHeads = diffHeads; TwoStepFit(arrayData); double summants = 0; if (_arOrder > 0) { for (var i = 0; i < _arOrder; i++) // AR Parameters { summants += ArParameters[i] * arrayData[i]; } } if (_maOrder > 0) { for (var i = 0; i < _maOrder; i++) // MA Parameters { summants += MaParameters[i] * _residuals[_maOrder + i + 1]; } } summants += Intercept; // By default equals 0 if (_diffOrder > 0) { var dataCast = arrayData.ToList(); dataCast.Insert(0, summants); // Prepends summants = InverseDifferencedSeries(dataCast.ToArray(), _diffHeads).First(); // Returns disintegrated series } return (decimal)summants; } return 0m; } /// /// Fits the model by means of implementing the following pseudo-code algorithm (in the form of "if{then}"): /// /// if diffOrder > 0 {Difference data diffOrder times} /// if arOrder > 0 {Fit the AR model Xₜ = ΣᵢφᵢXₜ; ε's are set to residuals from fitting this.} /// if maOrder > 0 {Fit the MA parameters left over Xₜ = c + εₜ + ΣᵢφᵢXₜ₋ᵢ + Σᵢθᵢεₜ₋ᵢ} /// Return: φ and θ estimates. /// /// http://mbhauser.com/informal-notes/two-step-arma-estimation.pdf /// private void TwoStepFit(double[] series) // Protected for any future inheritors (e.g., SARIMA) { _residuals = new List(); double errorAr = 0; double errorMa = 0; var lags = _arOrder > 0 ? LaggedSeries(_arOrder, series) : new[] {series}; AutoRegressiveStep(lags, series, errorAr); if (_maOrder <= 0) { return; } MovingAverageStep(lags, series, errorMa); } /// /// Fits the moving average component in the method. /// /// An array of lagged data (). /// The input series, differenced times. /// The summed residuals (by default 0) associated with the MA component. private void MovingAverageStep(double[][] lags, double[] data, double errorMa) { var appendedData = new List(); var laggedErrors = LaggedSeries(_maOrder, _residuals.ToArray()); for (var i = 0; i < laggedErrors.Length; i++) { var doubles = lags[i].ToList(); doubles.AddRange(laggedErrors[i]); appendedData.Add(doubles.ToArray()); } double[] maFits = default; if (HandleExceptions) { try { maFits = Fit.MultiDim(appendedData.ToArray(), data.Skip(_maOrder).ToArray(), method: DirectRegressionMethod.NormalEquations, intercept: _intercept); } catch (Exception ex) { if (!_loggedOnceInMovingAverageStep) { Logging.Log.Error($"AutoRegressiveIntegratedMovingAverage.MovingAverageStep(): {ex.Message}"); _loggedOnceInMovingAverageStep = true; } // The method Fit.MultiDim takes the appendedData array of mxn(m rows, n columns), computes its // transpose of size nxm, and then multiplies the tranpose with the original matrix, so the // resultant matrix is of size nxn. Then a linear system Ax=b is solved where A is the // aforementioned matrix and b is the data. Thus, the size of the response x is n // // It's worth saying that if intercept flag is set to true, the number of columns of the initial // matrix (appendedData) is increased in one. For more information, please see the implementation // of Fit.MultiDim() method (Ctrl + right click) var size = appendedData.ToArray()[0].Length + (_intercept ? 1 : 0); maFits = new double[size]; } } else { maFits = Fit.MultiDim(appendedData.ToArray(), data.Skip(_maOrder).ToArray(), method: DirectRegressionMethod.NormalEquations, intercept: _intercept); } for (var i = _maOrder; i < data.Length; i++) // Calculate the error assoc. with model. { var paramVector = _intercept ? Vector.Build.Dense(maFits.Skip(1).ToArray()) : Vector.Build.Dense(maFits); var residual = data[i] - Vector.Build.Dense(appendedData[i - _maOrder]).DotProduct(paramVector); errorMa += Math.Pow(residual, 2); } switch (_intercept) { case true: MaResidualError = errorMa / (data.Length - Math.Max(_arOrder, _maOrder) - 1); MaParameters = maFits.Skip(1 + _arOrder).ToArray(); ArParameters = maFits.Skip(1).Take(_arOrder).ToArray(); Intercept = maFits[0]; break; default: MaResidualError = errorMa / (data.Length - Math.Max(_arOrder, _maOrder) - 1); MaParameters = maFits.Skip(_arOrder).ToArray(); ArParameters = maFits.Take(_arOrder).ToArray(); break; } } /// /// Fits the autoregressive component in the method. /// /// An array of lagged data (). /// The input series, differenced times. /// The summed residuals (by default 0) associated with the AR component. private void AutoRegressiveStep(double[][] lags, double[] data, double errorAr) { double[] arFits; if (HandleExceptions) { try { // The function (lags[time][lagged X]) |---> ΣᵢφᵢXₜ₋ᵢ arFits = Fit.MultiDim(lags, data.Skip(_arOrder).ToArray(), method: DirectRegressionMethod.NormalEquations); } catch (Exception ex) { if (!_loggedOnceInAutoRegressiveStep) { Logging.Log.Error($"AutoRegressiveIntegratedMovingAverage.AutoRegressiveStep(): {ex.Message}"); _loggedOnceInAutoRegressiveStep = true; } // The method Fit.MultiDim takes the lags array of mxn(m rows, n columns), computes its // transpose of size nxm, and then multiplies the tranpose with the original matrix, so the // resultant matrix is of size nxn. Then a linear system Ax=b is solved where A is the // aforementioned matrix and b is the data. Thus, the size of the response x is n // // For more information, please see the implementation of Fit.MultiDim() method (Ctrl + right click) var size = lags.ToArray()[0].Length; arFits = new double[size]; } } else { // The function (lags[time][lagged X]) |---> ΣᵢφᵢXₜ₋ᵢ arFits = Fit.MultiDim(lags, data.Skip(_arOrder).ToArray(), method: DirectRegressionMethod.NormalEquations); } var fittedVec = Vector.Build.Dense(arFits); for (var i = 0; i < data.Length; i++) // Calculate the error assoc. with model. { if (i < _arOrder) { _residuals.Add(0); // 0-padding continue; } var residual = data[i] - Vector.Build.Dense(lags[i - _arOrder]).DotProduct(fittedVec); errorAr += Math.Pow(residual, 2); _residuals.Add(residual); } ArResidualError = errorAr / (data.Length - _arOrder - 1); if (_maOrder == 0) { ArParameters = arFits; // Will not be thrown out } } } }