Phase 3: Backtest determinism

- Fix deprecated datetime.utcfromtimestamp() in shared_utilities.py
- Create BacktestResult schema with locked structure
- Add TradeResult and BacktestMetrics dataclasses
- Implement determinism hash for verifying reproducible results
- Add comprehensive tests for result schema and determinism

The BacktestResult schema ensures consistent output format and
provides methods to verify that same inputs produce same results.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
rob 2026-02-28 17:02:54 -04:00
parent f1182d4e0c
commit 1bb224b15d
3 changed files with 555 additions and 1 deletions

229
src/backtest_result.py Normal file
View File

@ -0,0 +1,229 @@
"""
Backtest Result Schema for BrighterTrading.
Defines the standardized structure for backtest results to ensure
consistency and determinism across runs.
"""
from dataclasses import dataclass, field, asdict
from typing import List, Dict, Any, Optional
from datetime import datetime
import json
@dataclass
class TradeResult:
"""Individual trade result."""
ref: int # Trade reference number
symbol: str
side: str # 'buy' or 'sell'
open_datetime: str # ISO format
close_datetime: Optional[str] # ISO format, None if still open
size: float
open_price: float
close_price: Optional[float]
pnl: float # Profit/loss
pnlcomm: float # P&L after commission
commission: float = 0.0
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class BacktestMetrics:
"""Performance metrics from backtest."""
total_return: float = 0.0
sharpe_ratio: float = 0.0
sortino_ratio: float = 0.0
calmar_ratio: float = 0.0
volatility: float = 0.0
max_drawdown: float = 0.0
profit_factor: float = 0.0
average_pnl: float = 0.0
number_of_trades: int = 0
win_loss_ratio: float = 0.0
max_consecutive_wins: int = 0
max_consecutive_losses: int = 0
win_rate: float = 0.0
loss_rate: float = 0.0
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class BacktestResult:
"""
Standardized backtest result schema.
This schema ensures consistent results across runs for the same
strategy and data, enabling determinism verification.
"""
# Identification
strategy_id: str
strategy_name: str
user_id: int
backtest_id: str
# Timing
start_date: str # ISO format
end_date: str # ISO format
run_datetime: str # When the backtest was run (ISO format)
run_duration_seconds: float
# Capital
initial_capital: float
final_portfolio_value: float
commission_rate: float
# Results
success: bool
error_message: Optional[str] = None
# Data
equity_curve: List[float] = field(default_factory=list)
trades: List[Dict[str, Any]] = field(default_factory=list)
metrics: BacktestMetrics = field(default_factory=BacktestMetrics)
# Metadata for reproducibility
data_source: Optional[str] = None
symbol: Optional[str] = None
timeframe: Optional[str] = None
data_points: int = 0
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization."""
result = asdict(self)
# Convert metrics to dict if it's a dataclass
if hasattr(self.metrics, 'to_dict'):
result['metrics'] = self.metrics.to_dict()
return result
def to_json(self, indent: int = 2) -> str:
"""Convert to JSON string."""
return json.dumps(self.to_dict(), indent=indent, default=str)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'BacktestResult':
"""Create from dictionary."""
# Handle metrics conversion
if 'metrics' in data and isinstance(data['metrics'], dict):
data['metrics'] = BacktestMetrics(**data['metrics'])
return cls(**data)
def get_determinism_hash(self) -> str:
"""
Generate a hash of the deterministic parts of the result.
Excludes non-deterministic fields like run_datetime and run_duration.
"""
import hashlib
# Include only deterministic fields
deterministic_data = {
'strategy_id': self.strategy_id,
'initial_capital': self.initial_capital,
'final_portfolio_value': round(self.final_portfolio_value, 6),
'equity_curve': [round(e, 6) for e in self.equity_curve],
'trades_count': len(self.trades),
'trades_pnl': sum(t.get('pnl', 0) for t in self.trades),
'metrics': {
'total_return': round(self.metrics.total_return, 6),
'number_of_trades': self.metrics.number_of_trades,
'win_rate': round(self.metrics.win_rate, 6),
}
}
json_str = json.dumps(deterministic_data, sort_keys=True)
return hashlib.sha256(json_str.encode()).hexdigest()
def verify_determinism(self, other: 'BacktestResult') -> bool:
"""
Verify that another backtest result is deterministically equivalent.
:param other: Another backtest result to compare.
:return: True if results are deterministically equivalent.
"""
return self.get_determinism_hash() == other.get_determinism_hash()
def create_backtest_result(
strategy_id: str,
strategy_name: str,
user_id: int,
backtest_id: str,
initial_capital: float,
final_value: float,
equity_curve: List[float],
trades: List[Dict[str, Any]],
stats: Dict[str, Any],
run_duration: float,
success: bool = True,
error_message: Optional[str] = None,
start_date: str = None,
end_date: str = None,
commission: float = 0.001,
**kwargs
) -> BacktestResult:
"""
Factory function to create a BacktestResult from raw backtest output.
:param strategy_id: Strategy identifier.
:param strategy_name: Strategy name.
:param user_id: User identifier.
:param backtest_id: Unique backtest identifier.
:param initial_capital: Starting capital.
:param final_value: Final portfolio value.
:param equity_curve: List of portfolio values over time.
:param trades: List of trade dictionaries.
:param stats: Dictionary of performance metrics.
:param run_duration: Backtest runtime in seconds.
:param success: Whether backtest succeeded.
:param error_message: Error message if failed.
:param start_date: Backtest start date (ISO format).
:param end_date: Backtest end date (ISO format).
:param commission: Commission rate.
:param kwargs: Additional metadata.
:return: BacktestResult instance.
"""
# Create metrics from stats dict
metrics = BacktestMetrics(
total_return=stats.get('total_return', 0.0),
sharpe_ratio=stats.get('sharpe_ratio', 0.0),
sortino_ratio=stats.get('sortino_ratio', 0.0),
calmar_ratio=stats.get('calmar_ratio', 0.0),
volatility=stats.get('volatility', 0.0),
max_drawdown=stats.get('max_drawdown', 0.0),
profit_factor=stats.get('profit_factor', 0.0),
average_pnl=stats.get('average_pnl', 0.0),
number_of_trades=stats.get('number_of_trades', len(trades)),
win_loss_ratio=stats.get('win_loss_ratio', 0.0),
max_consecutive_wins=stats.get('max_consecutive_wins', 0),
max_consecutive_losses=stats.get('max_consecutive_losses', 0),
win_rate=stats.get('win_rate', 0.0),
loss_rate=stats.get('loss_rate', 0.0),
)
return BacktestResult(
strategy_id=strategy_id,
strategy_name=strategy_name,
user_id=user_id,
backtest_id=backtest_id,
start_date=start_date or datetime.now().isoformat(),
end_date=end_date or datetime.now().isoformat(),
run_datetime=datetime.now().isoformat(),
run_duration_seconds=run_duration,
initial_capital=initial_capital,
final_portfolio_value=final_value,
commission_rate=commission,
success=success,
error_message=error_message,
equity_curve=equity_curve,
trades=trades,
metrics=metrics,
data_source=kwargs.get('data_source'),
symbol=kwargs.get('symbol'),
timeframe=kwargs.get('timeframe'),
data_points=kwargs.get('data_points', len(equity_curve)),
)

View File

@ -5,7 +5,8 @@ from typing import Union
import pandas as pd import pandas as pd
import pytz import pytz
epoch = dt.datetime.utcfromtimestamp(0).replace(tzinfo=pytz.UTC) # Unix epoch in UTC (timezone-aware)
epoch = dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc)
def query_uptodate(records: pd.DataFrame, r_length_min: float) -> Union[float, None]: def query_uptodate(records: pd.DataFrame, r_length_min: float) -> Union[float, None]:

View File

@ -0,0 +1,324 @@
"""
Tests for backtest determinism.
These tests ensure that running the same strategy with the same data
produces identical results every time.
"""
import pytest
from backtest_result import (
BacktestResult, BacktestMetrics, TradeResult,
create_backtest_result
)
class TestBacktestResult:
"""Tests for BacktestResult schema."""
def test_create_backtest_result(self):
"""Test creating a backtest result."""
result = create_backtest_result(
strategy_id='test-strategy-1',
strategy_name='Test Strategy',
user_id=1,
backtest_id='bt-001',
initial_capital=10000.0,
final_value=11500.0,
equity_curve=[10000, 10200, 10100, 10500, 11000, 11500],
trades=[
{'ref': 1, 'pnl': 200, 'side': 'buy'},
{'ref': 2, 'pnl': 300, 'side': 'buy'},
],
stats={
'total_return': 15.0,
'sharpe_ratio': 1.2,
'max_drawdown': -5.0,
'win_rate': 100.0,
'number_of_trades': 2,
},
run_duration=1.5,
)
assert result.success
assert result.initial_capital == 10000.0
assert result.final_portfolio_value == 11500.0
assert len(result.equity_curve) == 6
assert len(result.trades) == 2
assert result.metrics.total_return == 15.0
assert result.metrics.win_rate == 100.0
def test_backtest_result_to_dict(self):
"""Test converting result to dictionary."""
result = create_backtest_result(
strategy_id='test-1',
strategy_name='Test',
user_id=1,
backtest_id='bt-001',
initial_capital=10000,
final_value=10500,
equity_curve=[10000, 10500],
trades=[],
stats={'total_return': 5.0},
run_duration=0.5,
)
d = result.to_dict()
assert isinstance(d, dict)
assert d['strategy_id'] == 'test-1'
assert d['initial_capital'] == 10000
assert isinstance(d['metrics'], dict)
def test_backtest_result_to_json(self):
"""Test JSON serialization."""
result = create_backtest_result(
strategy_id='test-1',
strategy_name='Test',
user_id=1,
backtest_id='bt-001',
initial_capital=10000,
final_value=10500,
equity_curve=[10000, 10500],
trades=[],
stats={},
run_duration=0.5,
)
json_str = result.to_json()
assert isinstance(json_str, str)
assert 'test-1' in json_str
def test_backtest_result_from_dict(self):
"""Test creating result from dictionary."""
data = {
'strategy_id': 'test-1',
'strategy_name': 'Test',
'user_id': 1,
'backtest_id': 'bt-001',
'start_date': '2024-01-01T00:00:00',
'end_date': '2024-01-31T00:00:00',
'run_datetime': '2024-02-01T12:00:00',
'run_duration_seconds': 1.5,
'initial_capital': 10000,
'final_portfolio_value': 10500,
'commission_rate': 0.001,
'success': True,
'equity_curve': [10000, 10250, 10500],
'trades': [],
'metrics': {
'total_return': 5.0,
'number_of_trades': 0,
}
}
result = BacktestResult.from_dict(data)
assert result.strategy_id == 'test-1'
assert result.initial_capital == 10000
assert result.metrics.total_return == 5.0
class TestBacktestDeterminism:
"""Tests for verifying backtest determinism."""
def test_same_inputs_same_hash(self):
"""Test that identical inputs produce the same hash."""
result1 = create_backtest_result(
strategy_id='strategy-abc',
strategy_name='Test Strategy',
user_id=1,
backtest_id='bt-001',
initial_capital=10000.0,
final_value=11000.0,
equity_curve=[10000, 10500, 11000],
trades=[
{'ref': 1, 'pnl': 500, 'side': 'buy'},
{'ref': 2, 'pnl': 500, 'side': 'sell'},
],
stats={
'total_return': 10.0,
'number_of_trades': 2,
'win_rate': 100.0,
},
run_duration=1.0,
)
result2 = create_backtest_result(
strategy_id='strategy-abc',
strategy_name='Test Strategy',
user_id=1,
backtest_id='bt-002', # Different ID
initial_capital=10000.0,
final_value=11000.0,
equity_curve=[10000, 10500, 11000],
trades=[
{'ref': 1, 'pnl': 500, 'side': 'buy'},
{'ref': 2, 'pnl': 500, 'side': 'sell'},
],
stats={
'total_return': 10.0,
'number_of_trades': 2,
'win_rate': 100.0,
},
run_duration=2.0, # Different runtime
)
# Hashes should be identical despite different backtest_id and run_duration
assert result1.get_determinism_hash() == result2.get_determinism_hash()
def test_different_results_different_hash(self):
"""Test that different results produce different hashes."""
result1 = create_backtest_result(
strategy_id='strategy-abc',
strategy_name='Test',
user_id=1,
backtest_id='bt-001',
initial_capital=10000.0,
final_value=11000.0,
equity_curve=[10000, 10500, 11000],
trades=[{'pnl': 1000}],
stats={'total_return': 10.0, 'win_rate': 100.0, 'number_of_trades': 1},
run_duration=1.0,
)
result2 = create_backtest_result(
strategy_id='strategy-abc',
strategy_name='Test',
user_id=1,
backtest_id='bt-001',
initial_capital=10000.0,
final_value=10500.0, # Different final value
equity_curve=[10000, 10250, 10500], # Different curve
trades=[{'pnl': 500}], # Different PnL
stats={'total_return': 5.0, 'win_rate': 100.0, 'number_of_trades': 1},
run_duration=1.0,
)
assert result1.get_determinism_hash() != result2.get_determinism_hash()
def test_verify_determinism(self):
"""Test the verify_determinism method."""
result1 = create_backtest_result(
strategy_id='strategy-1',
strategy_name='Test',
user_id=1,
backtest_id='bt-001',
initial_capital=10000,
final_value=10500,
equity_curve=[10000, 10250, 10500],
trades=[],
stats={'total_return': 5.0, 'number_of_trades': 0, 'win_rate': 0.0},
run_duration=1.0,
)
# Same result
result2 = create_backtest_result(
strategy_id='strategy-1',
strategy_name='Test',
user_id=1,
backtest_id='bt-002',
initial_capital=10000,
final_value=10500,
equity_curve=[10000, 10250, 10500],
trades=[],
stats={'total_return': 5.0, 'number_of_trades': 0, 'win_rate': 0.0},
run_duration=2.0,
)
assert result1.verify_determinism(result2)
def test_floating_point_precision(self):
"""Test that floating point precision doesn't break determinism."""
# Results with slightly different floating point representations
result1 = create_backtest_result(
strategy_id='strategy-1',
strategy_name='Test',
user_id=1,
backtest_id='bt-001',
initial_capital=10000.0,
final_value=10500.123456,
equity_curve=[10000.0, 10500.123456],
trades=[{'pnl': 500.123456}],
stats={'total_return': 5.001234, 'number_of_trades': 1, 'win_rate': 100.0},
run_duration=1.0,
)
result2 = create_backtest_result(
strategy_id='strategy-1',
strategy_name='Test',
user_id=1,
backtest_id='bt-002',
initial_capital=10000.0,
final_value=10500.123456,
equity_curve=[10000.0, 10500.123456],
trades=[{'pnl': 500.123456}],
stats={'total_return': 5.001234, 'number_of_trades': 1, 'win_rate': 100.0},
run_duration=1.0,
)
# Should still be equal due to rounding in hash
assert result1.get_determinism_hash() == result2.get_determinism_hash()
class TestBacktestMetrics:
"""Tests for BacktestMetrics."""
def test_metrics_defaults(self):
"""Test that metrics have sensible defaults."""
metrics = BacktestMetrics()
assert metrics.total_return == 0.0
assert metrics.number_of_trades == 0
assert metrics.win_rate == 0.0
def test_metrics_to_dict(self):
"""Test metrics conversion to dict."""
metrics = BacktestMetrics(
total_return=15.5,
sharpe_ratio=1.2,
number_of_trades=10,
win_rate=60.0,
)
d = metrics.to_dict()
assert d['total_return'] == 15.5
assert d['number_of_trades'] == 10
class TestTradeResult:
"""Tests for TradeResult."""
def test_trade_result_creation(self):
"""Test creating a trade result."""
trade = TradeResult(
ref=1,
symbol='BTC/USDT',
side='buy',
open_datetime='2024-01-01T10:00:00',
close_datetime='2024-01-01T12:00:00',
size=0.1,
open_price=50000,
close_price=51000,
pnl=100,
pnlcomm=99,
commission=1,
)
assert trade.ref == 1
assert trade.symbol == 'BTC/USDT'
assert trade.pnl == 100
def test_trade_result_to_dict(self):
"""Test trade result conversion to dict."""
trade = TradeResult(
ref=1,
symbol='BTC/USDT',
side='buy',
open_datetime='2024-01-01T10:00:00',
close_datetime=None,
size=0.1,
open_price=50000,
close_price=None,
pnl=0,
pnlcomm=0,
)
d = trade.to_dict()
assert isinstance(d, dict)
assert d['ref'] == 1
assert d['close_datetime'] is None