From 1bb224b15de026e391d6cedf927845324b7de536 Mon Sep 17 00:00:00 2001 From: rob Date: Sat, 28 Feb 2026 17:02:54 -0400 Subject: [PATCH] Phase 3: Backtest determinism - Fix deprecated datetime.utcfromtimestamp() in shared_utilities.py - Create BacktestResult schema with locked structure - Add TradeResult and BacktestMetrics dataclasses - Implement determinism hash for verifying reproducible results - Add comprehensive tests for result schema and determinism The BacktestResult schema ensures consistent output format and provides methods to verify that same inputs produce same results. Co-Authored-By: Claude Opus 4.5 --- src/backtest_result.py | 229 ++++++++++++++++++++ src/shared_utilities.py | 3 +- tests/test_backtest_determinism.py | 324 +++++++++++++++++++++++++++++ 3 files changed, 555 insertions(+), 1 deletion(-) create mode 100644 src/backtest_result.py create mode 100644 tests/test_backtest_determinism.py diff --git a/src/backtest_result.py b/src/backtest_result.py new file mode 100644 index 0000000..a08d381 --- /dev/null +++ b/src/backtest_result.py @@ -0,0 +1,229 @@ +""" +Backtest Result Schema for BrighterTrading. + +Defines the standardized structure for backtest results to ensure +consistency and determinism across runs. +""" + +from dataclasses import dataclass, field, asdict +from typing import List, Dict, Any, Optional +from datetime import datetime +import json + + +@dataclass +class TradeResult: + """Individual trade result.""" + ref: int # Trade reference number + symbol: str + side: str # 'buy' or 'sell' + open_datetime: str # ISO format + close_datetime: Optional[str] # ISO format, None if still open + size: float + open_price: float + close_price: Optional[float] + pnl: float # Profit/loss + pnlcomm: float # P&L after commission + commission: float = 0.0 + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + +@dataclass +class BacktestMetrics: + """Performance metrics from backtest.""" + total_return: float = 0.0 + sharpe_ratio: float = 0.0 + sortino_ratio: float = 0.0 + calmar_ratio: float = 0.0 + volatility: float = 0.0 + max_drawdown: float = 0.0 + profit_factor: float = 0.0 + average_pnl: float = 0.0 + number_of_trades: int = 0 + win_loss_ratio: float = 0.0 + max_consecutive_wins: int = 0 + max_consecutive_losses: int = 0 + win_rate: float = 0.0 + loss_rate: float = 0.0 + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + +@dataclass +class BacktestResult: + """ + Standardized backtest result schema. + + This schema ensures consistent results across runs for the same + strategy and data, enabling determinism verification. + """ + # Identification + strategy_id: str + strategy_name: str + user_id: int + backtest_id: str + + # Timing + start_date: str # ISO format + end_date: str # ISO format + run_datetime: str # When the backtest was run (ISO format) + run_duration_seconds: float + + # Capital + initial_capital: float + final_portfolio_value: float + commission_rate: float + + # Results + success: bool + error_message: Optional[str] = None + + # Data + equity_curve: List[float] = field(default_factory=list) + trades: List[Dict[str, Any]] = field(default_factory=list) + metrics: BacktestMetrics = field(default_factory=BacktestMetrics) + + # Metadata for reproducibility + data_source: Optional[str] = None + symbol: Optional[str] = None + timeframe: Optional[str] = None + data_points: int = 0 + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for serialization.""" + result = asdict(self) + # Convert metrics to dict if it's a dataclass + if hasattr(self.metrics, 'to_dict'): + result['metrics'] = self.metrics.to_dict() + return result + + def to_json(self, indent: int = 2) -> str: + """Convert to JSON string.""" + return json.dumps(self.to_dict(), indent=indent, default=str) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'BacktestResult': + """Create from dictionary.""" + # Handle metrics conversion + if 'metrics' in data and isinstance(data['metrics'], dict): + data['metrics'] = BacktestMetrics(**data['metrics']) + return cls(**data) + + def get_determinism_hash(self) -> str: + """ + Generate a hash of the deterministic parts of the result. + + Excludes non-deterministic fields like run_datetime and run_duration. + """ + import hashlib + + # Include only deterministic fields + deterministic_data = { + 'strategy_id': self.strategy_id, + 'initial_capital': self.initial_capital, + 'final_portfolio_value': round(self.final_portfolio_value, 6), + 'equity_curve': [round(e, 6) for e in self.equity_curve], + 'trades_count': len(self.trades), + 'trades_pnl': sum(t.get('pnl', 0) for t in self.trades), + 'metrics': { + 'total_return': round(self.metrics.total_return, 6), + 'number_of_trades': self.metrics.number_of_trades, + 'win_rate': round(self.metrics.win_rate, 6), + } + } + + json_str = json.dumps(deterministic_data, sort_keys=True) + return hashlib.sha256(json_str.encode()).hexdigest() + + def verify_determinism(self, other: 'BacktestResult') -> bool: + """ + Verify that another backtest result is deterministically equivalent. + + :param other: Another backtest result to compare. + :return: True if results are deterministically equivalent. + """ + return self.get_determinism_hash() == other.get_determinism_hash() + + +def create_backtest_result( + strategy_id: str, + strategy_name: str, + user_id: int, + backtest_id: str, + initial_capital: float, + final_value: float, + equity_curve: List[float], + trades: List[Dict[str, Any]], + stats: Dict[str, Any], + run_duration: float, + success: bool = True, + error_message: Optional[str] = None, + start_date: str = None, + end_date: str = None, + commission: float = 0.001, + **kwargs +) -> BacktestResult: + """ + Factory function to create a BacktestResult from raw backtest output. + + :param strategy_id: Strategy identifier. + :param strategy_name: Strategy name. + :param user_id: User identifier. + :param backtest_id: Unique backtest identifier. + :param initial_capital: Starting capital. + :param final_value: Final portfolio value. + :param equity_curve: List of portfolio values over time. + :param trades: List of trade dictionaries. + :param stats: Dictionary of performance metrics. + :param run_duration: Backtest runtime in seconds. + :param success: Whether backtest succeeded. + :param error_message: Error message if failed. + :param start_date: Backtest start date (ISO format). + :param end_date: Backtest end date (ISO format). + :param commission: Commission rate. + :param kwargs: Additional metadata. + :return: BacktestResult instance. + """ + # Create metrics from stats dict + metrics = BacktestMetrics( + total_return=stats.get('total_return', 0.0), + sharpe_ratio=stats.get('sharpe_ratio', 0.0), + sortino_ratio=stats.get('sortino_ratio', 0.0), + calmar_ratio=stats.get('calmar_ratio', 0.0), + volatility=stats.get('volatility', 0.0), + max_drawdown=stats.get('max_drawdown', 0.0), + profit_factor=stats.get('profit_factor', 0.0), + average_pnl=stats.get('average_pnl', 0.0), + number_of_trades=stats.get('number_of_trades', len(trades)), + win_loss_ratio=stats.get('win_loss_ratio', 0.0), + max_consecutive_wins=stats.get('max_consecutive_wins', 0), + max_consecutive_losses=stats.get('max_consecutive_losses', 0), + win_rate=stats.get('win_rate', 0.0), + loss_rate=stats.get('loss_rate', 0.0), + ) + + return BacktestResult( + strategy_id=strategy_id, + strategy_name=strategy_name, + user_id=user_id, + backtest_id=backtest_id, + start_date=start_date or datetime.now().isoformat(), + end_date=end_date or datetime.now().isoformat(), + run_datetime=datetime.now().isoformat(), + run_duration_seconds=run_duration, + initial_capital=initial_capital, + final_portfolio_value=final_value, + commission_rate=commission, + success=success, + error_message=error_message, + equity_curve=equity_curve, + trades=trades, + metrics=metrics, + data_source=kwargs.get('data_source'), + symbol=kwargs.get('symbol'), + timeframe=kwargs.get('timeframe'), + data_points=kwargs.get('data_points', len(equity_curve)), + ) diff --git a/src/shared_utilities.py b/src/shared_utilities.py index 51f6a4e..461da64 100644 --- a/src/shared_utilities.py +++ b/src/shared_utilities.py @@ -5,7 +5,8 @@ from typing import Union import pandas as pd import pytz -epoch = dt.datetime.utcfromtimestamp(0).replace(tzinfo=pytz.UTC) +# Unix epoch in UTC (timezone-aware) +epoch = dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc) def query_uptodate(records: pd.DataFrame, r_length_min: float) -> Union[float, None]: diff --git a/tests/test_backtest_determinism.py b/tests/test_backtest_determinism.py new file mode 100644 index 0000000..18d43f4 --- /dev/null +++ b/tests/test_backtest_determinism.py @@ -0,0 +1,324 @@ +""" +Tests for backtest determinism. + +These tests ensure that running the same strategy with the same data +produces identical results every time. +""" +import pytest +from backtest_result import ( + BacktestResult, BacktestMetrics, TradeResult, + create_backtest_result +) + + +class TestBacktestResult: + """Tests for BacktestResult schema.""" + + def test_create_backtest_result(self): + """Test creating a backtest result.""" + result = create_backtest_result( + strategy_id='test-strategy-1', + strategy_name='Test Strategy', + user_id=1, + backtest_id='bt-001', + initial_capital=10000.0, + final_value=11500.0, + equity_curve=[10000, 10200, 10100, 10500, 11000, 11500], + trades=[ + {'ref': 1, 'pnl': 200, 'side': 'buy'}, + {'ref': 2, 'pnl': 300, 'side': 'buy'}, + ], + stats={ + 'total_return': 15.0, + 'sharpe_ratio': 1.2, + 'max_drawdown': -5.0, + 'win_rate': 100.0, + 'number_of_trades': 2, + }, + run_duration=1.5, + ) + + assert result.success + assert result.initial_capital == 10000.0 + assert result.final_portfolio_value == 11500.0 + assert len(result.equity_curve) == 6 + assert len(result.trades) == 2 + assert result.metrics.total_return == 15.0 + assert result.metrics.win_rate == 100.0 + + def test_backtest_result_to_dict(self): + """Test converting result to dictionary.""" + result = create_backtest_result( + strategy_id='test-1', + strategy_name='Test', + user_id=1, + backtest_id='bt-001', + initial_capital=10000, + final_value=10500, + equity_curve=[10000, 10500], + trades=[], + stats={'total_return': 5.0}, + run_duration=0.5, + ) + + d = result.to_dict() + assert isinstance(d, dict) + assert d['strategy_id'] == 'test-1' + assert d['initial_capital'] == 10000 + assert isinstance(d['metrics'], dict) + + def test_backtest_result_to_json(self): + """Test JSON serialization.""" + result = create_backtest_result( + strategy_id='test-1', + strategy_name='Test', + user_id=1, + backtest_id='bt-001', + initial_capital=10000, + final_value=10500, + equity_curve=[10000, 10500], + trades=[], + stats={}, + run_duration=0.5, + ) + + json_str = result.to_json() + assert isinstance(json_str, str) + assert 'test-1' in json_str + + def test_backtest_result_from_dict(self): + """Test creating result from dictionary.""" + data = { + 'strategy_id': 'test-1', + 'strategy_name': 'Test', + 'user_id': 1, + 'backtest_id': 'bt-001', + 'start_date': '2024-01-01T00:00:00', + 'end_date': '2024-01-31T00:00:00', + 'run_datetime': '2024-02-01T12:00:00', + 'run_duration_seconds': 1.5, + 'initial_capital': 10000, + 'final_portfolio_value': 10500, + 'commission_rate': 0.001, + 'success': True, + 'equity_curve': [10000, 10250, 10500], + 'trades': [], + 'metrics': { + 'total_return': 5.0, + 'number_of_trades': 0, + } + } + + result = BacktestResult.from_dict(data) + assert result.strategy_id == 'test-1' + assert result.initial_capital == 10000 + assert result.metrics.total_return == 5.0 + + +class TestBacktestDeterminism: + """Tests for verifying backtest determinism.""" + + def test_same_inputs_same_hash(self): + """Test that identical inputs produce the same hash.""" + result1 = create_backtest_result( + strategy_id='strategy-abc', + strategy_name='Test Strategy', + user_id=1, + backtest_id='bt-001', + initial_capital=10000.0, + final_value=11000.0, + equity_curve=[10000, 10500, 11000], + trades=[ + {'ref': 1, 'pnl': 500, 'side': 'buy'}, + {'ref': 2, 'pnl': 500, 'side': 'sell'}, + ], + stats={ + 'total_return': 10.0, + 'number_of_trades': 2, + 'win_rate': 100.0, + }, + run_duration=1.0, + ) + + result2 = create_backtest_result( + strategy_id='strategy-abc', + strategy_name='Test Strategy', + user_id=1, + backtest_id='bt-002', # Different ID + initial_capital=10000.0, + final_value=11000.0, + equity_curve=[10000, 10500, 11000], + trades=[ + {'ref': 1, 'pnl': 500, 'side': 'buy'}, + {'ref': 2, 'pnl': 500, 'side': 'sell'}, + ], + stats={ + 'total_return': 10.0, + 'number_of_trades': 2, + 'win_rate': 100.0, + }, + run_duration=2.0, # Different runtime + ) + + # Hashes should be identical despite different backtest_id and run_duration + assert result1.get_determinism_hash() == result2.get_determinism_hash() + + def test_different_results_different_hash(self): + """Test that different results produce different hashes.""" + result1 = create_backtest_result( + strategy_id='strategy-abc', + strategy_name='Test', + user_id=1, + backtest_id='bt-001', + initial_capital=10000.0, + final_value=11000.0, + equity_curve=[10000, 10500, 11000], + trades=[{'pnl': 1000}], + stats={'total_return': 10.0, 'win_rate': 100.0, 'number_of_trades': 1}, + run_duration=1.0, + ) + + result2 = create_backtest_result( + strategy_id='strategy-abc', + strategy_name='Test', + user_id=1, + backtest_id='bt-001', + initial_capital=10000.0, + final_value=10500.0, # Different final value + equity_curve=[10000, 10250, 10500], # Different curve + trades=[{'pnl': 500}], # Different PnL + stats={'total_return': 5.0, 'win_rate': 100.0, 'number_of_trades': 1}, + run_duration=1.0, + ) + + assert result1.get_determinism_hash() != result2.get_determinism_hash() + + def test_verify_determinism(self): + """Test the verify_determinism method.""" + result1 = create_backtest_result( + strategy_id='strategy-1', + strategy_name='Test', + user_id=1, + backtest_id='bt-001', + initial_capital=10000, + final_value=10500, + equity_curve=[10000, 10250, 10500], + trades=[], + stats={'total_return': 5.0, 'number_of_trades': 0, 'win_rate': 0.0}, + run_duration=1.0, + ) + + # Same result + result2 = create_backtest_result( + strategy_id='strategy-1', + strategy_name='Test', + user_id=1, + backtest_id='bt-002', + initial_capital=10000, + final_value=10500, + equity_curve=[10000, 10250, 10500], + trades=[], + stats={'total_return': 5.0, 'number_of_trades': 0, 'win_rate': 0.0}, + run_duration=2.0, + ) + + assert result1.verify_determinism(result2) + + def test_floating_point_precision(self): + """Test that floating point precision doesn't break determinism.""" + # Results with slightly different floating point representations + result1 = create_backtest_result( + strategy_id='strategy-1', + strategy_name='Test', + user_id=1, + backtest_id='bt-001', + initial_capital=10000.0, + final_value=10500.123456, + equity_curve=[10000.0, 10500.123456], + trades=[{'pnl': 500.123456}], + stats={'total_return': 5.001234, 'number_of_trades': 1, 'win_rate': 100.0}, + run_duration=1.0, + ) + + result2 = create_backtest_result( + strategy_id='strategy-1', + strategy_name='Test', + user_id=1, + backtest_id='bt-002', + initial_capital=10000.0, + final_value=10500.123456, + equity_curve=[10000.0, 10500.123456], + trades=[{'pnl': 500.123456}], + stats={'total_return': 5.001234, 'number_of_trades': 1, 'win_rate': 100.0}, + run_duration=1.0, + ) + + # Should still be equal due to rounding in hash + assert result1.get_determinism_hash() == result2.get_determinism_hash() + + +class TestBacktestMetrics: + """Tests for BacktestMetrics.""" + + def test_metrics_defaults(self): + """Test that metrics have sensible defaults.""" + metrics = BacktestMetrics() + assert metrics.total_return == 0.0 + assert metrics.number_of_trades == 0 + assert metrics.win_rate == 0.0 + + def test_metrics_to_dict(self): + """Test metrics conversion to dict.""" + metrics = BacktestMetrics( + total_return=15.5, + sharpe_ratio=1.2, + number_of_trades=10, + win_rate=60.0, + ) + d = metrics.to_dict() + assert d['total_return'] == 15.5 + assert d['number_of_trades'] == 10 + + +class TestTradeResult: + """Tests for TradeResult.""" + + def test_trade_result_creation(self): + """Test creating a trade result.""" + trade = TradeResult( + ref=1, + symbol='BTC/USDT', + side='buy', + open_datetime='2024-01-01T10:00:00', + close_datetime='2024-01-01T12:00:00', + size=0.1, + open_price=50000, + close_price=51000, + pnl=100, + pnlcomm=99, + commission=1, + ) + + assert trade.ref == 1 + assert trade.symbol == 'BTC/USDT' + assert trade.pnl == 100 + + def test_trade_result_to_dict(self): + """Test trade result conversion to dict.""" + trade = TradeResult( + ref=1, + symbol='BTC/USDT', + side='buy', + open_datetime='2024-01-01T10:00:00', + close_datetime=None, + size=0.1, + open_price=50000, + close_price=None, + pnl=0, + pnlcomm=0, + ) + + d = trade.to_dict() + assert isinstance(d, dict) + assert d['ref'] == 1 + assert d['close_datetime'] is None