/*
* QUANTCONNECT.COM - Democratizing Finance, Empowering Individuals.
* Lean Algorithmic Trading Engine v2.0. Copyright 2014 QuantConnect Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Python.Runtime;
using QuantConnect.Data;
using QuantConnect.Indicators;
using QuantConnect.Util;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
namespace QuantConnect.Python
{
///
/// Collection of methods that converts lists of objects in pandas.DataFrame
///
public partial class PandasConverter
{
private static dynamic _pandas;
private static PyObject _concat;
///
/// Initializes the class
///
static PandasConverter()
{
using (Py.GIL())
{
var pandas = Py.Import("pandas");
_pandas = pandas;
// keep it so we don't need to ask for it each time
_concat = pandas.GetAttr("concat");
}
}
///
/// Converts an enumerable of in a pandas.DataFrame
///
/// Enumerable of
/// Whether to flatten collections into rows and columns
/// Optional type of bars to add to the data frame
/// If true, the base data items time will be ignored and only the base data collection time will be used in the index
/// containing a pandas.DataFrame
public PyObject GetDataFrame(IEnumerable data, bool flatten = false, Type dataType = null)
{
var generator = new DataFrameGenerator(data, flatten, dataType);
return generator.GenerateDataFrame();
}
///
/// Converts an enumerable of in a pandas.DataFrame
///
/// Enumerable of
/// Whether to make the index only the symbol, without time or any other index levels
/// Useful when the data contains points for multiple symbols.
/// If false and is true, it will assume there is a single point for each symbol,
/// and will apply performance improvements for the data frame generation.
/// Whether to flatten collections into rows and columns
/// containing a pandas.DataFrame
/// Helper method for testing
public PyObject GetDataFrame(IEnumerable data, bool symbolOnlyIndex = false, bool forceMultiValueSymbol = false, bool flatten = false)
where T : ISymbolProvider
{
var generator = new DataFrameGenerator(data, flatten);
return generator.GenerateDataFrame(
// Use 2 instead of maxLevels for backwards compatibility
levels: symbolOnlyIndex ? 1 : 2,
sort: false,
symbolOnlyIndex: symbolOnlyIndex,
forceMultiValueSymbol: forceMultiValueSymbol);
}
///
/// Converts a dictionary with a list of in a pandas.DataFrame
///
/// Dictionary with a list of
/// Optional dynamic properties to include in the DataFrame.
/// containing a pandas.DataFrame
public PyObject GetIndicatorDataFrame(IEnumerable>> data, IEnumerable>> extraData = null)
{
using (Py.GIL())
{
using var pyDict = new PyDict();
foreach (var kvp in data)
{
AddSeriesToPyDict(kvp.Key, kvp.Value, pyDict);
}
if (extraData != null)
{
foreach (var kvp in extraData)
{
AddDynamicSeriesToPyDict(kvp.Key, kvp.Value, pyDict);
}
}
return MakeIndicatorDataFrame(pyDict);
}
}
///
/// Converts a dictionary with a list of in a pandas.DataFrame
///
/// that should be a dictionary (convertible to PyDict) of string to list of
/// containing a pandas.DataFrame
public PyObject GetIndicatorDataFrame(PyObject data)
{
using (Py.GIL())
{
using var inputPythonType = data.GetPythonType();
var inputTypeStr = inputPythonType.ToString();
var targetTypeStr = nameof(PyDict);
PyObject currentKvp = null;
try
{
using var pyDictData = new PyDict(data);
using var seriesPyDict = new PyDict();
targetTypeStr = $"{nameof(String)}: {nameof(List)}";
foreach (var kvp in pyDictData.Items())
{
currentKvp = kvp;
AddSeriesToPyDict(kvp[0].As(), kvp[1].As>(), seriesPyDict);
}
return MakeIndicatorDataFrame(seriesPyDict);
}
catch (Exception e)
{
if (currentKvp != null)
{
inputTypeStr = $"{currentKvp[0].GetPythonType()}: {currentKvp[1].GetPythonType()}";
}
throw new ArgumentException(Messages.PandasConverter.ConvertToDictionaryFailed(inputTypeStr, targetTypeStr, e.Message), e);
}
}
}
///
/// Returns a string that represent the current object
///
///
public override string ToString()
{
if (_pandas == null)
{
return Messages.PandasConverter.PandasModuleNotImported;
}
using (Py.GIL())
{
return _pandas.Repr();
}
}
///
/// Concatenates multiple data frames
///
/// The data frames to concatenate
///
/// Optional new keys for a new multi-index level that would be added
/// to index each individual data frame in the resulting one
///
/// The optional names of the new index level (and the existing ones if they need to be changed)
/// Whether to sort the resulting data frame
/// Whether to drop columns containing NA values only (Nan, None, etc)
/// A new data frame result from concatenating the input
public static PyObject ConcatDataFrames(IEnumerable dataFrames, IEnumerable keys, IEnumerable names,
bool sort = true, bool dropna = true)
{
using (Py.GIL())
{
using var pyDataFrames = dataFrames.ToPyListUnSafe();
if (pyDataFrames.Length() == 0)
{
return _pandas.DataFrame();
}
using var kwargs = Py.kw("sort", sort);
PyList pyKeys = null;
PyList pyNames = null;
try
{
if (keys != null && names != null)
{
pyNames = names.ToPyListUnSafe();
pyKeys = ConvertConcatKeys(keys);
using var pyFalse = false.ToPython();
kwargs.SetItem("keys", pyKeys);
kwargs.SetItem("names", pyNames);
kwargs.SetItem("copy", pyFalse);
}
var result = _concat.Invoke(new[] { pyDataFrames }, kwargs);
// Drop columns with only NaN or None values
if (dropna)
{
using var dropnaKwargs = Py.kw("axis", 1, "inplace", true, "how", "all");
result.GetAttr("dropna").Invoke(Array.Empty(), dropnaKwargs);
}
return result;
}
finally
{
pyKeys?.Dispose();
pyNames?.Dispose();
}
}
}
public static PyObject ConcatDataFrames(IEnumerable dataFrames, bool sort = true, bool dropna = true)
{
return ConcatDataFrames(dataFrames, null, null, sort, dropna);
}
///
/// Creates the list of keys required for the pd.concat method, making sure that if the items are enumerables,
/// they are converted to Python tuples so that they are used as levels for a multi index
///
private static PyList ConvertConcatKeys(IEnumerable> keys)
{
var keyTuples = keys.Select(x => new PyTuple(x.Select(y => y.ToPython()).ToArray()));
try
{
return keyTuples.ToPyListUnSafe();
}
finally
{
foreach (var tuple in keyTuples)
{
foreach (var x in tuple)
{
x.DisposeSafely();
}
tuple.DisposeSafely();
}
}
}
private static PyList ConvertConcatKeys(IEnumerable keys)
{
if ((typeof(T).IsAssignableTo(typeof(IEnumerable)) && !typeof(T).IsAssignableTo(typeof(string))))
{
return ConvertConcatKeys(keys.Cast>());
}
return keys.ToPyListUnSafe();
}
///
/// Creates a series from a list of and adds it to the
/// as the value of the given
///
/// Key to insert in the
/// List of that will make up the resulting series
/// where the resulting key-value pair will be inserted into
private void AddSeriesToPyDict(string key, List points, PyDict pyDict)
{
var index = new List();
var values = new List();
foreach (var point in points)
{
if (point.EndTime != default)
{
index.Add(point.EndTime);
values.Add((double)point.Value);
}
}
pyDict.SetItem(key.ToLowerInvariant(), _pandas.Series(values, index));
}
///
/// Builds a time‑indexed pandas from a collection of
/// heterogeneous data (numbers, enums, strings, etc.) and inserts it into the
/// specified under the given .
///
/// Key to insert in the
/// A list of tuples whose first item is the timestamp and whose second item is the value associated with that timestamp.
/// where the resulting key-value pair will be inserted into
private void AddDynamicSeriesToPyDict(string key, List<(DateTime Timestamp, object Value)> entries, PyDict pyDict)
{
var index = new List();
var values = new List