/* * QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals. * Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using Python.Runtime; using QuantConnect.Data; using QuantConnect.Indicators; using QuantConnect.Util; using System; using System.Collections; using System.Collections.Generic; using System.Linq; namespace QuantConnect.Python { /// /// Collection of methods that converts lists of objects in pandas.DataFrame /// public partial class PandasConverter { private static dynamic _pandas; private static PyObject _concat; /// /// Initializes the class /// static PandasConverter() { using (Py.GIL()) { var pandas = Py.Import("pandas"); _pandas = pandas; // keep it so we don't need to ask for it each time _concat = pandas.GetAttr("concat"); } } /// /// Converts an enumerable of in a pandas.DataFrame /// /// Enumerable of /// Whether to flatten collections into rows and columns /// Optional type of bars to add to the data frame /// If true, the base data items time will be ignored and only the base data collection time will be used in the index /// containing a pandas.DataFrame public PyObject GetDataFrame(IEnumerable data, bool flatten = false, Type dataType = null) { var generator = new DataFrameGenerator(data, flatten, dataType); return generator.GenerateDataFrame(); } /// /// Converts an enumerable of in a pandas.DataFrame /// /// Enumerable of /// Whether to make the index only the symbol, without time or any other index levels /// Useful when the data contains points for multiple symbols. /// If false and is true, it will assume there is a single point for each symbol, /// and will apply performance improvements for the data frame generation. /// Whether to flatten collections into rows and columns /// containing a pandas.DataFrame /// Helper method for testing public PyObject GetDataFrame(IEnumerable data, bool symbolOnlyIndex = false, bool forceMultiValueSymbol = false, bool flatten = false) where T : ISymbolProvider { var generator = new DataFrameGenerator(data, flatten); return generator.GenerateDataFrame( // Use 2 instead of maxLevels for backwards compatibility levels: symbolOnlyIndex ? 1 : 2, sort: false, symbolOnlyIndex: symbolOnlyIndex, forceMultiValueSymbol: forceMultiValueSymbol); } /// /// Converts a dictionary with a list of in a pandas.DataFrame /// /// Dictionary with a list of /// Optional dynamic properties to include in the DataFrame. /// containing a pandas.DataFrame public PyObject GetIndicatorDataFrame(IEnumerable>> data, IEnumerable>> extraData = null) { using (Py.GIL()) { using var pyDict = new PyDict(); foreach (var kvp in data) { AddSeriesToPyDict(kvp.Key, kvp.Value, pyDict); } if (extraData != null) { foreach (var kvp in extraData) { AddDynamicSeriesToPyDict(kvp.Key, kvp.Value, pyDict); } } return MakeIndicatorDataFrame(pyDict); } } /// /// Converts a dictionary with a list of in a pandas.DataFrame /// /// that should be a dictionary (convertible to PyDict) of string to list of /// containing a pandas.DataFrame public PyObject GetIndicatorDataFrame(PyObject data) { using (Py.GIL()) { using var inputPythonType = data.GetPythonType(); var inputTypeStr = inputPythonType.ToString(); var targetTypeStr = nameof(PyDict); PyObject currentKvp = null; try { using var pyDictData = new PyDict(data); using var seriesPyDict = new PyDict(); targetTypeStr = $"{nameof(String)}: {nameof(List)}"; foreach (var kvp in pyDictData.Items()) { currentKvp = kvp; AddSeriesToPyDict(kvp[0].As(), kvp[1].As>(), seriesPyDict); } return MakeIndicatorDataFrame(seriesPyDict); } catch (Exception e) { if (currentKvp != null) { inputTypeStr = $"{currentKvp[0].GetPythonType()}: {currentKvp[1].GetPythonType()}"; } throw new ArgumentException(Messages.PandasConverter.ConvertToDictionaryFailed(inputTypeStr, targetTypeStr, e.Message), e); } } } /// /// Returns a string that represent the current object /// /// public override string ToString() { if (_pandas == null) { return Messages.PandasConverter.PandasModuleNotImported; } using (Py.GIL()) { return _pandas.Repr(); } } /// /// Concatenates multiple data frames /// /// The data frames to concatenate /// /// Optional new keys for a new multi-index level that would be added /// to index each individual data frame in the resulting one /// /// The optional names of the new index level (and the existing ones if they need to be changed) /// Whether to sort the resulting data frame /// Whether to drop columns containing NA values only (Nan, None, etc) /// A new data frame result from concatenating the input public static PyObject ConcatDataFrames(IEnumerable dataFrames, IEnumerable keys, IEnumerable names, bool sort = true, bool dropna = true) { using (Py.GIL()) { using var pyDataFrames = dataFrames.ToPyListUnSafe(); if (pyDataFrames.Length() == 0) { return _pandas.DataFrame(); } using var kwargs = Py.kw("sort", sort); PyList pyKeys = null; PyList pyNames = null; try { if (keys != null && names != null) { pyNames = names.ToPyListUnSafe(); pyKeys = ConvertConcatKeys(keys); using var pyFalse = false.ToPython(); kwargs.SetItem("keys", pyKeys); kwargs.SetItem("names", pyNames); kwargs.SetItem("copy", pyFalse); } var result = _concat.Invoke(new[] { pyDataFrames }, kwargs); // Drop columns with only NaN or None values if (dropna) { using var dropnaKwargs = Py.kw("axis", 1, "inplace", true, "how", "all"); result.GetAttr("dropna").Invoke(Array.Empty(), dropnaKwargs); } return result; } finally { pyKeys?.Dispose(); pyNames?.Dispose(); } } } public static PyObject ConcatDataFrames(IEnumerable dataFrames, bool sort = true, bool dropna = true) { return ConcatDataFrames(dataFrames, null, null, sort, dropna); } /// /// Creates the list of keys required for the pd.concat method, making sure that if the items are enumerables, /// they are converted to Python tuples so that they are used as levels for a multi index /// private static PyList ConvertConcatKeys(IEnumerable> keys) { var keyTuples = keys.Select(x => new PyTuple(x.Select(y => y.ToPython()).ToArray())); try { return keyTuples.ToPyListUnSafe(); } finally { foreach (var tuple in keyTuples) { foreach (var x in tuple) { x.DisposeSafely(); } tuple.DisposeSafely(); } } } private static PyList ConvertConcatKeys(IEnumerable keys) { if ((typeof(T).IsAssignableTo(typeof(IEnumerable)) && !typeof(T).IsAssignableTo(typeof(string)))) { return ConvertConcatKeys(keys.Cast>()); } return keys.ToPyListUnSafe(); } /// /// Creates a series from a list of and adds it to the /// as the value of the given /// /// Key to insert in the /// List of that will make up the resulting series /// where the resulting key-value pair will be inserted into private void AddSeriesToPyDict(string key, List points, PyDict pyDict) { var index = new List(); var values = new List(); foreach (var point in points) { if (point.EndTime != default) { index.Add(point.EndTime); values.Add((double)point.Value); } } pyDict.SetItem(key.ToLowerInvariant(), _pandas.Series(values, index)); } /// /// Builds a time‑indexed pandas from a collection of /// heterogeneous data (numbers, enums, strings, etc.) and inserts it into the /// specified under the given . /// /// Key to insert in the /// A list of tuples whose first item is the timestamp and whose second item is the value associated with that timestamp. /// where the resulting key-value pair will be inserted into private void AddDynamicSeriesToPyDict(string key, List<(DateTime Timestamp, object Value)> entries, PyDict pyDict) { var index = new List(); var values = new List(); foreach (var (timestamp, value) in entries) { if (timestamp != default) { index.Add(timestamp); values.Add(value is Enum e ? e.ToString() : value); } } pyDict.SetItem(key.ToLowerInvariant(), _pandas.Series(values, index)); } /// /// Converts a of string to pandas.Series in a pandas.DataFrame /// /// of string to pandas.Series /// containing a pandas.DataFrame private PyObject MakeIndicatorDataFrame(PyDict pyDict) { return _pandas.DataFrame(pyDict, columns: pyDict.Keys().Select(x => x.As().ToLowerInvariant()).OrderBy(x => x)); } } }