exchange-data-manager/tests/test_cache.py

598 lines
22 KiB
Python

"""Tests for cache components."""
import pytest
import tempfile
import os
from exchange_data_manager.candles.models import Candle, CandleRequest
from exchange_data_manager.cache.memory import MemoryCache
from exchange_data_manager.cache.database import DatabaseCache
class TestMemoryCache:
"""Tests for MemoryCache class."""
def test_create_cache(self):
"""Test creating a memory cache."""
cache = MemoryCache(max_candles=100, ttl_seconds=60)
assert cache.max_candles == 100
assert cache.ttl_seconds == 60
def test_put_and_get_candles(self):
"""Test storing and retrieving candles."""
cache = MemoryCache()
candles = [
Candle(time=1709337600, open=50000.0, high=50100.0, low=49900.0, close=50050.0, volume=10.0),
Candle(time=1709337660, open=50050.0, high=50200.0, low=50000.0, close=50150.0, volume=15.0),
Candle(time=1709337720, open=50150.0, high=50300.0, low=50100.0, close=50250.0, volume=12.0),
]
cache_key = "binance:BTC/USDT:1m"
cache.put(cache_key, candles)
result, gaps = cache.get(cache_key)
assert len(result) == 3
assert result[0].time == 1709337600
assert result[2].time == 1709337720
def test_get_nonexistent_returns_empty(self):
"""Test that getting non-existent data returns empty list."""
cache = MemoryCache()
result, gaps = cache.get("binance:BTC/USDT:1m")
assert result == []
def test_get_with_time_range(self):
"""Test getting candles within a time range."""
cache = MemoryCache()
candles = [
Candle(time=1709337600, open=50000.0, high=50100.0, low=49900.0, close=50050.0, volume=10.0),
Candle(time=1709337660, open=50050.0, high=50200.0, low=50000.0, close=50150.0, volume=15.0),
Candle(time=1709337720, open=50150.0, high=50300.0, low=50100.0, close=50250.0, volume=12.0),
Candle(time=1709337780, open=50250.0, high=50400.0, low=50200.0, close=50350.0, volume=8.0),
]
cache.put("binance:BTC/USDT:1m", candles)
# Get subset
result, gaps = cache.get(
"binance:BTC/USDT:1m",
start=1709337660,
end=1709337720,
)
assert len(result) == 2
assert result[0].time == 1709337660
assert result[1].time == 1709337720
def test_clear(self):
"""Test clearing the cache."""
cache = MemoryCache()
candles = [Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0)]
cache.put("binance:BTC/USDT:1m", candles)
cache.clear()
result, _ = cache.get("binance:BTC/USDT:1m")
assert result == []
def test_stats(self):
"""Test cache statistics."""
cache = MemoryCache()
candles = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337660, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
cache.put("binance:BTC/USDT:1m", candles)
stats = cache.stats()
assert stats["num_entries"] == 1
assert stats["total_candles"] == 2
def test_update_candle(self):
"""Test updating a single candle."""
cache = MemoryCache()
candles = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
cache.put("binance:BTC/USDT:1m", candles)
# Update the candle
updated = Candle(time=1709337600, open=50000.0, high=51000.0, low=49000.0, close=50500.0, volume=10.0)
cache.update_candle("binance:BTC/USDT:1m", updated)
result, _ = cache.get("binance:BTC/USDT:1m")
assert len(result) == 1
assert result[0].high == 51000.0
assert result[0].volume == 10.0
def test_merge_candles(self):
"""Test that putting candles merges with existing data."""
cache = MemoryCache()
# First batch
candles1 = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337660, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
cache.put("binance:BTC/USDT:1m", candles1)
# Second batch with overlap and new
candles2 = [
Candle(time=1709337660, open=51000.0, high=51000.0, low=51000.0, close=51000.0, volume=2.0), # Update
Candle(time=1709337720, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0), # New
]
cache.put("binance:BTC/USDT:1m", candles2)
result, _ = cache.get("binance:BTC/USDT:1m")
assert len(result) == 3
# Check that second candle was updated
assert result[1].open == 51000.0
assert result[1].volume == 2.0
def test_single_candle_gap_detection(self):
"""Test that a single missing candle is detected as a gap."""
cache = MemoryCache()
# Candles with 1m interval but missing the second one
# time: 0, 60, 180 (missing 120)
candles = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
# Missing: time=1709337660
Candle(time=1709337720, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
cache.put("binance:BTC/USDT:1m", candles)
result, gaps = cache.get(
"binance:BTC/USDT:1m",
start=1709337600,
end=1709337720,
)
assert len(result) == 2
# Should detect the gap for the single missing candle
assert len(gaps) == 1
assert gaps[0] == (1709337660, 1709337660) # Single candle gap
def test_single_candle_end_gap_detection(self):
"""Test that a single missing candle at the end is detected as a gap."""
cache = MemoryCache()
candles = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337660, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337720, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
cache.put("binance:BTC/USDT:1m", candles)
result, gaps = cache.get(
"binance:BTC/USDT:1m",
start=1709337600,
end=1709337780,
)
assert len(result) == 3
assert len(gaps) == 1
assert gaps[0] == (1709337780, 1709337780) # Single candle end gap
class TestDatabaseCache:
"""Tests for DatabaseCache class."""
@pytest.fixture
def db_cache(self):
"""Create a temporary database for testing."""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
db_path = f.name
cache = DatabaseCache(db_path=db_path)
yield cache
# Cleanup
if os.path.exists(db_path):
os.unlink(db_path)
def test_put_and_get_candles(self, db_cache):
"""Test storing and retrieving candles."""
candles = [
Candle(time=1709337600, open=50000.0, high=50100.0, low=49900.0, close=50050.0, volume=10.0),
Candle(time=1709337660, open=50050.0, high=50200.0, low=50000.0, close=50150.0, volume=15.0),
]
db_cache.put("binance", "BTC/USDT", "1m", candles)
result, gaps = db_cache.get("binance", "BTC/USDT", "1m")
assert len(result) == 2
assert result[0].time == 1709337600
assert result[1].time == 1709337660
def test_get_empty_returns_empty_list(self, db_cache):
"""Test that getting non-existent data returns empty list."""
result, gaps = db_cache.get("binance", "BTC/USDT", "1m")
assert result == []
def test_get_with_time_range(self, db_cache):
"""Test getting candles within a time range."""
candles = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337660, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337720, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337780, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
db_cache.put("binance", "BTC/USDT", "1m", candles)
result, gaps = db_cache.get(
"binance", "BTC/USDT", "1m",
start=1709337660,
end=1709337720,
)
assert len(result) == 2
assert result[0].time == 1709337660
assert result[1].time == 1709337720
def test_upsert_updates_existing(self, db_cache):
"""Test that storing duplicate timestamps updates existing records."""
candles1 = [
Candle(time=1709337600, open=50000.0, high=50100.0, low=49900.0, close=50050.0, volume=10.0),
]
db_cache.put("binance", "BTC/USDT", "1m", candles1)
# Store updated candle with same timestamp
candles2 = [
Candle(time=1709337600, open=51000.0, high=51100.0, low=50900.0, close=51050.0, volume=20.0),
]
db_cache.put("binance", "BTC/USDT", "1m", candles2)
result, gaps = db_cache.get("binance", "BTC/USDT", "1m")
# Should only have one candle with updated values
assert len(result) == 1
assert result[0].open == 51000.0
assert result[0].volume == 20.0
def test_delete_all(self, db_cache):
"""Test deleting all data."""
candles = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
db_cache.put("binance", "BTC/USDT", "1m", candles)
db_cache.put("binance", "ETH/USDT", "1m", candles)
db_cache.delete()
result1, _ = db_cache.get("binance", "BTC/USDT", "1m")
result2, _ = db_cache.get("binance", "ETH/USDT", "1m")
assert result1 == []
assert result2 == []
def test_delete_filtered(self, db_cache):
"""Test deleting filtered data."""
candles = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
db_cache.put("binance", "BTC/USDT", "1m", candles)
db_cache.put("binance", "ETH/USDT", "1m", candles)
# Only delete BTC
db_cache.delete(exchange="binance", symbol="BTC/USDT")
result1, _ = db_cache.get("binance", "BTC/USDT", "1m")
result2, _ = db_cache.get("binance", "ETH/USDT", "1m")
assert result1 == []
assert len(result2) == 1
def test_stats(self, db_cache):
"""Test database statistics."""
candles = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337660, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
db_cache.put("binance", "BTC/USDT", "1m", candles)
stats = db_cache.stats()
assert stats["total_candles"] == 2
assert stats["num_entries"] == 1
def test_get_time_range(self, db_cache):
"""Test getting the time range of cached data."""
candles = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337660, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337720, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
db_cache.put("binance", "BTC/USDT", "1m", candles)
time_range = db_cache.get_time_range("binance", "BTC/USDT", "1m")
assert time_range is not None
assert time_range[0] == 1709337600 # min
assert time_range[1] == 1709337720 # max
def test_get_time_range_no_data(self, db_cache):
"""Test getting time range when no data exists."""
time_range = db_cache.get_time_range("binance", "BTC/USDT", "1m")
assert time_range is None
def test_single_candle_gap_detection(self, db_cache):
"""Test that a single missing candle is detected as a gap."""
# Candles with 1m interval but missing the second one
candles = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
# Missing: time=1709337660
Candle(time=1709337720, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
db_cache.put("binance", "BTC/USDT", "1m", candles)
result, gaps = db_cache.get(
"binance", "BTC/USDT", "1m",
start=1709337600,
end=1709337720,
)
assert len(result) == 2
# Should detect the gap for the single missing candle
assert len(gaps) == 1
assert gaps[0] == (1709337660, 1709337660) # Single candle gap
def test_single_candle_end_gap_detection(self, db_cache):
"""Test that a single missing candle at the end is detected as a gap."""
candles = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337660, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337720, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
db_cache.put("binance", "BTC/USDT", "1m", candles)
result, gaps = db_cache.get(
"binance", "BTC/USDT", "1m",
start=1709337600,
end=1709337780,
)
assert len(result) == 3
assert len(gaps) == 1
assert gaps[0] == (1709337780, 1709337780) # Single candle end gap
class TestMemoryCacheBinarySearch:
"""Tests for binary search optimizations in MemoryCache."""
def test_binary_search_range_query_large_dataset(self):
"""Test that time range queries work correctly on large datasets."""
cache = MemoryCache()
# Create 1000 candles (1m intervals)
candles = [
Candle(
time=1709337600 + i * 60,
open=50000.0 + i,
high=50100.0 + i,
low=49900.0 + i,
close=50050.0 + i,
volume=10.0,
)
for i in range(1000)
]
cache.put("binance:BTC/USDT:1m", candles)
# Query a middle range
start = 1709337600 + 100 * 60 # Candle 100
end = 1709337600 + 200 * 60 # Candle 200
result, _ = cache.get("binance:BTC/USDT:1m", start=start, end=end)
assert len(result) == 101 # Inclusive range
assert result[0].time == start
assert result[-1].time == end
def test_binary_search_update_maintains_order(self):
"""Test that update_candle maintains sort order via binary insert."""
cache = MemoryCache()
# Start with some candles
candles = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337720, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
cache.put("binance:BTC/USDT:1m", candles)
# Insert a candle in the middle
middle_candle = Candle(time=1709337660, open=51000.0, high=51000.0, low=51000.0, close=51000.0, volume=2.0)
cache.update_candle("binance:BTC/USDT:1m", middle_candle)
result, _ = cache.get("binance:BTC/USDT:1m")
# Should be sorted
assert len(result) == 3
assert result[0].time == 1709337600
assert result[1].time == 1709337660
assert result[2].time == 1709337720
def test_binary_search_finds_exact_candle_for_update(self):
"""Test that update_candle finds and updates exact match."""
cache = MemoryCache()
candles = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337660, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337720, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
cache.put("binance:BTC/USDT:1m", candles)
# Update middle candle
updated = Candle(time=1709337660, open=99999.0, high=99999.0, low=99999.0, close=99999.0, volume=99.0)
cache.update_candle("binance:BTC/USDT:1m", updated)
result, _ = cache.get("binance:BTC/USDT:1m")
assert len(result) == 3 # No new candle added
assert result[1].open == 99999.0
def test_binary_search_empty_range_result(self):
"""Test querying a range with no candles returns empty."""
cache = MemoryCache()
candles = [
Candle(time=1709337600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=1709337660, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
cache.put("binance:BTC/USDT:1m", candles)
# Query range after all candles
result, gaps = cache.get("binance:BTC/USDT:1m", start=1709338000, end=1709339000)
assert len(result) == 0
assert len(gaps) == 1
def test_binary_search_boundary_conditions(self):
"""Test boundary conditions for binary search."""
cache = MemoryCache()
candles = [
Candle(time=100, open=1.0, high=1.0, low=1.0, close=1.0, volume=1.0),
Candle(time=200, open=2.0, high=2.0, low=2.0, close=2.0, volume=1.0),
Candle(time=300, open=3.0, high=3.0, low=3.0, close=3.0, volume=1.0),
]
cache.put("test:TEST:1m", candles)
# Query exact first candle
result, _ = cache.get("test:TEST:1m", start=100, end=100)
assert len(result) == 1
assert result[0].time == 100
# Query exact last candle
result, _ = cache.get("test:TEST:1m", start=300, end=300)
assert len(result) == 1
assert result[0].time == 300
# Query before all candles
result, _ = cache.get("test:TEST:1m", start=50, end=99)
assert len(result) == 0
# Query after all candles
result, _ = cache.get("test:TEST:1m", start=301, end=400)
assert len(result) == 0
class TestFreshnessCheck:
"""Tests for stale data detection in limit-only requests."""
def test_memory_cache_detects_stale_data_limit_only(self):
"""Test that memory cache detects stale data for limit-only requests."""
import time as time_module
cache = MemoryCache()
# Create candles that are 10 minutes old (stale for 1m timeframe)
now = int(time_module.time())
old_time = now - 600 # 10 minutes ago
candles = [
Candle(time=old_time - 120, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=old_time - 60, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=old_time, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
cache.put("binance:BTC/USDT:1m", candles)
# Limit-only request (no start/end) - should detect staleness
result, gaps = cache.get("binance:BTC/USDT:1m", limit=100)
assert len(result) == 3
# Should have a gap indicating data is stale and needs refresh
assert len(gaps) == 1
# Gap should be from after last candle to "now"
assert gaps[0][0] == old_time + 60 # Start after last candle
assert gaps[0][1] >= now - 5 # End should be close to now (within 5s tolerance)
def test_memory_cache_fresh_data_no_gaps_limit_only(self):
"""Test that fresh data returns no gaps for limit-only requests."""
import time as time_module
cache = MemoryCache()
# Create candles that are current (within 2 intervals)
now = int(time_module.time())
# Align to 1m boundary
aligned_now = now - (now % 60)
candles = [
Candle(time=aligned_now - 120, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=aligned_now - 60, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=aligned_now, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
cache.put("binance:BTC/USDT:1m", candles)
# Limit-only request - fresh data should have no gaps
result, gaps = cache.get("binance:BTC/USDT:1m", limit=100)
assert len(result) == 3
assert len(gaps) == 0 # No staleness gap
def test_memory_cache_stale_5m_timeframe(self):
"""Test staleness detection for 5m timeframe."""
import time as time_module
cache = MemoryCache()
# Create 5m candles that are 30 minutes old (stale: > 2 * 300s = 600s)
now = int(time_module.time())
old_time = now - 1800 # 30 minutes ago
candles = [
Candle(time=old_time - 600, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=old_time - 300, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=old_time, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
cache.put("binance:BTC/USDT:5m", candles)
# Limit-only request - should detect staleness
result, gaps = cache.get("binance:BTC/USDT:5m", limit=100)
assert len(result) == 3
assert len(gaps) == 1 # Staleness gap detected
def test_range_request_not_affected_by_freshness_check(self):
"""Test that range requests (with start/end) are not affected by freshness check."""
import time as time_module
cache = MemoryCache()
# Create old candles
old_time = 1709337600 # Fixed old timestamp
candles = [
Candle(time=old_time, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=old_time + 60, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
Candle(time=old_time + 120, open=50000.0, high=50000.0, low=50000.0, close=50000.0, volume=1.0),
]
cache.put("binance:BTC/USDT:1m", candles)
# Range request (with start/end) - freshness check should NOT apply
result, gaps = cache.get(
"binance:BTC/USDT:1m",
start=old_time,
end=old_time + 120
)
assert len(result) == 3
# No gaps because data covers the requested range completely
assert len(gaps) == 0