brighter-trading/tests/test_backtest_determinism.py

367 lines
12 KiB
Python

"""
Tests for backtest determinism.
These tests ensure that running the same strategy with the same data
produces identical results every time.
"""
import pytest
from backtest_result import (
BacktestResult, BacktestMetrics, TradeResult,
create_backtest_result
)
class TestBacktestResult:
"""Tests for BacktestResult schema."""
def test_create_backtest_result(self):
"""Test creating a backtest result."""
result = create_backtest_result(
strategy_id='test-strategy-1',
strategy_name='Test Strategy',
user_id=1,
backtest_id='bt-001',
initial_capital=10000.0,
final_value=11500.0,
equity_curve=[10000, 10200, 10100, 10500, 11000, 11500],
trades=[
{'ref': 1, 'pnl': 200, 'side': 'buy'},
{'ref': 2, 'pnl': 300, 'side': 'buy'},
],
stats={
'total_return': 15.0,
'sharpe_ratio': 1.2,
'max_drawdown': -5.0,
'win_rate': 100.0,
'number_of_trades': 2,
},
run_duration=1.5,
)
assert result.success
assert result.initial_capital == 10000.0
assert result.final_portfolio_value == 11500.0
assert len(result.equity_curve) == 6
assert len(result.trades) == 2
assert result.metrics.total_return == 15.0
assert result.metrics.win_rate == 100.0
def test_backtest_result_to_dict(self):
"""Test converting result to dictionary."""
result = create_backtest_result(
strategy_id='test-1',
strategy_name='Test',
user_id=1,
backtest_id='bt-001',
initial_capital=10000,
final_value=10500,
equity_curve=[10000, 10500],
trades=[],
stats={'total_return': 5.0},
run_duration=0.5,
)
d = result.to_dict()
assert isinstance(d, dict)
assert d['strategy_id'] == 'test-1'
assert d['initial_capital'] == 10000
assert isinstance(d['metrics'], dict)
def test_backtest_result_to_json(self):
"""Test JSON serialization."""
result = create_backtest_result(
strategy_id='test-1',
strategy_name='Test',
user_id=1,
backtest_id='bt-001',
initial_capital=10000,
final_value=10500,
equity_curve=[10000, 10500],
trades=[],
stats={},
run_duration=0.5,
)
json_str = result.to_json()
assert isinstance(json_str, str)
assert 'test-1' in json_str
def test_backtest_result_from_dict(self):
"""Test creating result from dictionary."""
data = {
'strategy_id': 'test-1',
'strategy_name': 'Test',
'user_id': 1,
'backtest_id': 'bt-001',
'start_date': '2024-01-01T00:00:00',
'end_date': '2024-01-31T00:00:00',
'run_datetime': '2024-02-01T12:00:00',
'run_duration_seconds': 1.5,
'initial_capital': 10000,
'final_portfolio_value': 10500,
'commission_rate': 0.001,
'success': True,
'equity_curve': [10000, 10250, 10500],
'trades': [],
'metrics': {
'total_return': 5.0,
'number_of_trades': 0,
}
}
result = BacktestResult.from_dict(data)
assert result.strategy_id == 'test-1'
assert result.initial_capital == 10000
assert result.metrics.total_return == 5.0
class TestBacktestDeterminism:
"""Tests for verifying backtest determinism."""
def test_same_inputs_same_hash(self):
"""Test that identical inputs produce the same hash."""
# Use identical trade data with all required fields
trades = [
{'ref': 1, 'pnl': 500, 'pnlcomm': 499, 'size': 0.1, 'open_price': 50000, 'close_price': 55000, 'side': 'buy'},
{'ref': 2, 'pnl': 500, 'pnlcomm': 499, 'size': 0.1, 'open_price': 55000, 'close_price': 60000, 'side': 'sell'},
]
stats = {
'total_return': 10.0,
'number_of_trades': 2,
'win_rate': 100.0,
'sharpe_ratio': 1.5,
'max_drawdown': -2.0,
'profit_factor': 2.0,
}
result1 = create_backtest_result(
strategy_id='strategy-abc',
strategy_name='Test Strategy',
user_id=1,
backtest_id='bt-001',
initial_capital=10000.0,
final_value=11000.0,
equity_curve=[10000, 10500, 11000],
trades=trades,
stats=stats,
run_duration=1.0,
)
result2 = create_backtest_result(
strategy_id='strategy-abc',
strategy_name='Test Strategy',
user_id=1,
backtest_id='bt-002', # Different ID
initial_capital=10000.0,
final_value=11000.0,
equity_curve=[10000, 10500, 11000],
trades=trades, # Same trades
stats=stats, # Same stats
run_duration=2.0, # Different runtime
)
# Hashes should be identical despite different backtest_id and run_duration
assert result1.get_determinism_hash() == result2.get_determinism_hash()
def test_different_results_different_hash(self):
"""Test that different results produce different hashes."""
result1 = create_backtest_result(
strategy_id='strategy-abc',
strategy_name='Test',
user_id=1,
backtest_id='bt-001',
initial_capital=10000.0,
final_value=11000.0,
equity_curve=[10000, 10500, 11000],
trades=[{'pnl': 1000, 'size': 0.1, 'open_price': 50000, 'close_price': 60000}],
stats={'total_return': 10.0, 'win_rate': 100.0, 'number_of_trades': 1},
run_duration=1.0,
)
result2 = create_backtest_result(
strategy_id='strategy-abc',
strategy_name='Test',
user_id=1,
backtest_id='bt-001',
initial_capital=10000.0,
final_value=10500.0, # Different final value
equity_curve=[10000, 10250, 10500], # Different curve
trades=[{'pnl': 500, 'size': 0.1, 'open_price': 50000, 'close_price': 55000}], # Different
stats={'total_return': 5.0, 'win_rate': 100.0, 'number_of_trades': 1},
run_duration=1.0,
)
assert result1.get_determinism_hash() != result2.get_determinism_hash()
def test_different_trade_sequence_different_hash(self):
"""Test that different trade sequences produce different hashes even with same totals."""
# Two trades: +500, +500 = same total as +200, +800
result1 = create_backtest_result(
strategy_id='strategy-abc',
strategy_name='Test',
user_id=1,
backtest_id='bt-001',
initial_capital=10000.0,
final_value=11000.0,
equity_curve=[10000, 10500, 11000],
trades=[
{'pnl': 500, 'size': 0.1, 'open_price': 50000, 'close_price': 55000},
{'pnl': 500, 'size': 0.1, 'open_price': 55000, 'close_price': 60000},
],
stats={'total_return': 10.0, 'win_rate': 100.0, 'number_of_trades': 2},
run_duration=1.0,
)
result2 = create_backtest_result(
strategy_id='strategy-abc',
strategy_name='Test',
user_id=1,
backtest_id='bt-001',
initial_capital=10000.0,
final_value=11000.0, # Same final value
equity_curve=[10000, 10200, 11000], # Different curve
trades=[
{'pnl': 200, 'size': 0.1, 'open_price': 50000, 'close_price': 52000}, # Different
{'pnl': 800, 'size': 0.1, 'open_price': 52000, 'close_price': 60000}, # Different
],
stats={'total_return': 10.0, 'win_rate': 100.0, 'number_of_trades': 2}, # Same totals
run_duration=1.0,
)
# Should be different even though total PnL is the same
assert result1.get_determinism_hash() != result2.get_determinism_hash()
def test_verify_determinism(self):
"""Test the verify_determinism method."""
stats = {'total_return': 5.0, 'number_of_trades': 0, 'win_rate': 0.0, 'sharpe_ratio': 0.0, 'max_drawdown': 0.0, 'profit_factor': 0.0}
result1 = create_backtest_result(
strategy_id='strategy-1',
strategy_name='Test',
user_id=1,
backtest_id='bt-001',
initial_capital=10000,
final_value=10500,
equity_curve=[10000, 10250, 10500],
trades=[],
stats=stats,
run_duration=1.0,
)
# Same result
result2 = create_backtest_result(
strategy_id='strategy-1',
strategy_name='Test',
user_id=1,
backtest_id='bt-002',
initial_capital=10000,
final_value=10500,
equity_curve=[10000, 10250, 10500],
trades=[],
stats=stats,
run_duration=2.0,
)
assert result1.verify_determinism(result2)
def test_floating_point_precision(self):
"""Test that floating point precision doesn't break determinism."""
trades = [{'pnl': 500.123456, 'size': 0.123456, 'open_price': 50000.123, 'close_price': 55000.456}]
stats = {'total_return': 5.001234, 'number_of_trades': 1, 'win_rate': 100.0, 'sharpe_ratio': 1.234, 'max_drawdown': -0.5, 'profit_factor': 2.5}
result1 = create_backtest_result(
strategy_id='strategy-1',
strategy_name='Test',
user_id=1,
backtest_id='bt-001',
initial_capital=10000.0,
final_value=10500.123456,
equity_curve=[10000.0, 10500.123456],
trades=trades,
stats=stats,
run_duration=1.0,
)
result2 = create_backtest_result(
strategy_id='strategy-1',
strategy_name='Test',
user_id=1,
backtest_id='bt-002',
initial_capital=10000.0,
final_value=10500.123456,
equity_curve=[10000.0, 10500.123456],
trades=trades,
stats=stats,
run_duration=1.0,
)
# Should still be equal due to rounding in hash
assert result1.get_determinism_hash() == result2.get_determinism_hash()
class TestBacktestMetrics:
"""Tests for BacktestMetrics."""
def test_metrics_defaults(self):
"""Test that metrics have sensible defaults."""
metrics = BacktestMetrics()
assert metrics.total_return == 0.0
assert metrics.number_of_trades == 0
assert metrics.win_rate == 0.0
def test_metrics_to_dict(self):
"""Test metrics conversion to dict."""
metrics = BacktestMetrics(
total_return=15.5,
sharpe_ratio=1.2,
number_of_trades=10,
win_rate=60.0,
)
d = metrics.to_dict()
assert d['total_return'] == 15.5
assert d['number_of_trades'] == 10
class TestTradeResult:
"""Tests for TradeResult."""
def test_trade_result_creation(self):
"""Test creating a trade result."""
trade = TradeResult(
ref=1,
symbol='BTC/USDT',
side='buy',
open_datetime='2024-01-01T10:00:00',
close_datetime='2024-01-01T12:00:00',
size=0.1,
open_price=50000,
close_price=51000,
pnl=100,
pnlcomm=99,
commission=1,
)
assert trade.ref == 1
assert trade.symbol == 'BTC/USDT'
assert trade.pnl == 100
def test_trade_result_to_dict(self):
"""Test trade result conversion to dict."""
trade = TradeResult(
ref=1,
symbol='BTC/USDT',
side='buy',
open_datetime='2024-01-01T10:00:00',
close_datetime=None,
size=0.1,
open_price=50000,
close_price=None,
pnl=0,
pnlcomm=0,
)
d = trade.to_dict()
assert isinstance(d, dict)
assert d['ref'] == 1
assert d['close_datetime'] is None