exchange-data-manager/src/exchange_data_manager/cache/database.py

344 lines
11 KiB
Python

"""
SQLite database cache for persistent candle storage.
"""
import sqlite3
import asyncio
from pathlib import Path
from typing import List, Optional, Tuple
from contextlib import contextmanager
import logging
from ..candles.models import Candle
logger = logging.getLogger(__name__)
class DatabaseCache:
"""
SQLite-based persistent cache for candle data.
Provides the second tier of the caching system (memory → database → exchange).
"""
def __init__(self, db_path: str = "./data/candles.db"):
"""
Initialize database cache.
Args:
db_path: Path to SQLite database file
"""
self.db_path = db_path
self._ensure_directory()
self._init_db()
def _ensure_directory(self):
"""Ensure the database directory exists."""
Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
def _init_db(self):
"""Initialize database schema."""
with self._get_connection() as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS candles (
id INTEGER PRIMARY KEY AUTOINCREMENT,
exchange TEXT NOT NULL,
symbol TEXT NOT NULL,
timeframe TEXT NOT NULL,
time INTEGER NOT NULL,
open REAL NOT NULL,
high REAL NOT NULL,
low REAL NOT NULL,
close REAL NOT NULL,
volume REAL NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(exchange, symbol, timeframe, time)
)
""")
# Create index for fast lookups
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_candles_lookup
ON candles(exchange, symbol, timeframe, time)
""")
# Create index for time range queries
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_candles_time_range
ON candles(exchange, symbol, timeframe, time DESC)
""")
conn.commit()
@contextmanager
def _get_connection(self):
"""Get a database connection with proper cleanup."""
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row
try:
yield conn
finally:
conn.close()
def get(
self,
exchange: str,
symbol: str,
timeframe: str,
start: Optional[int] = None,
end: Optional[int] = None,
limit: Optional[int] = None,
) -> Tuple[List[Candle], List[Tuple[int, int]]]:
"""
Get candles from database.
Args:
exchange: Exchange name
symbol: Trading pair symbol
timeframe: Candle timeframe
start: Start timestamp (inclusive)
end: End timestamp (inclusive)
limit: Maximum number of candles to return
Returns:
Tuple of (found_candles, missing_ranges)
"""
with self._get_connection() as conn:
query = """
SELECT time, open, high, low, close, volume
FROM candles
WHERE exchange = ? AND symbol = ? AND timeframe = ?
"""
params: list = [exchange.lower(), symbol.upper(), timeframe.lower()]
if start is not None:
query += " AND time >= ?"
params.append(start)
if end is not None:
query += " AND time <= ?"
params.append(end)
query += " ORDER BY time ASC"
if limit is not None:
# Get most recent, but we need to reverse the order
query = f"""
SELECT * FROM (
SELECT time, open, high, low, close, volume
FROM candles
WHERE exchange = ? AND symbol = ? AND timeframe = ?
{"AND time >= ?" if start is not None else ""}
{"AND time <= ?" if end is not None else ""}
ORDER BY time DESC
LIMIT ?
) ORDER BY time ASC
"""
params.append(limit)
cursor = conn.execute(query, params)
rows = cursor.fetchall()
candles = [
Candle(
time=row["time"],
open=row["open"],
high=row["high"],
low=row["low"],
close=row["close"],
volume=row["volume"],
closed=True,
)
for row in rows
]
# Detect gaps
missing_ranges = self._detect_gaps(
candles, start, end, timeframe
)
return candles, missing_ranges
def _detect_gaps(
self,
candles: List[Candle],
start: Optional[int],
end: Optional[int],
timeframe: str,
) -> List[Tuple[int, int]]:
"""Detect missing time ranges in the candle data."""
if not candles:
if start is not None and end is not None:
return [(start, end)]
return []
from ..candles.assembler import timeframe_to_seconds
import time as time_module
try:
interval = timeframe_to_seconds(timeframe)
except ValueError:
interval = 300 # Default to 5m
gaps = []
sorted_candles = sorted(candles, key=lambda c: c.time)
# Check gap at start
if start is not None and sorted_candles[0].time > start:
gaps.append((start, sorted_candles[0].time - interval))
# Check gaps between candles
for i in range(1, len(sorted_candles)):
expected_time = sorted_candles[i - 1].time + interval
actual_time = sorted_candles[i].time
if actual_time > expected_time: # Any missing period (including single candle)
gaps.append((expected_time, actual_time - interval))
# Check gap at end
if end is not None and sorted_candles[-1].time + interval <= end:
gaps.append((sorted_candles[-1].time + interval, end))
# FRESHNESS CHECK: For limit-only requests (no start/end), verify data is current
# If the most recent candle is too old, flag it as a gap so fresh data is fetched
if start is None and end is None:
now = int(time_module.time())
most_recent = sorted_candles[-1].time
# Consider data stale if most recent candle is older than 2 intervals
staleness_threshold = interval * 2
if now - most_recent > staleness_threshold:
# Data is stale - add gap from last candle to now
gaps.append((most_recent + interval, now))
return gaps
def put(self, exchange: str, symbol: str, timeframe: str, candles: List[Candle]):
"""
Store candles in database.
Uses INSERT OR REPLACE to handle duplicates.
Args:
exchange: Exchange name
symbol: Trading pair symbol
timeframe: Candle timeframe
candles: List of candles to store
"""
if not candles:
return
with self._get_connection() as conn:
conn.executemany(
"""
INSERT OR REPLACE INTO candles
(exchange, symbol, timeframe, time, open, high, low, close, volume)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
[
(
exchange.lower(),
symbol.upper(),
timeframe.lower(),
c.time,
c.open,
c.high,
c.low,
c.close,
c.volume,
)
for c in candles
],
)
conn.commit()
logger.debug(f"Stored {len(candles)} candles for {exchange}:{symbol}:{timeframe}")
def get_time_range(
self, exchange: str, symbol: str, timeframe: str
) -> Optional[Tuple[int, int]]:
"""
Get the time range of cached data for a symbol.
Returns:
Tuple of (earliest_time, latest_time) or None if no data
"""
with self._get_connection() as conn:
cursor = conn.execute(
"""
SELECT MIN(time) as min_time, MAX(time) as max_time
FROM candles
WHERE exchange = ? AND symbol = ? AND timeframe = ?
""",
(exchange.lower(), symbol.upper(), timeframe.lower()),
)
row = cursor.fetchone()
if row and row["min_time"] is not None:
return (row["min_time"], row["max_time"])
return None
def delete(
self,
exchange: Optional[str] = None,
symbol: Optional[str] = None,
timeframe: Optional[str] = None,
before: Optional[int] = None,
):
"""
Delete candles from database.
Args:
exchange: Filter by exchange
symbol: Filter by symbol
timeframe: Filter by timeframe
before: Delete candles older than this timestamp
"""
with self._get_connection() as conn:
conditions = []
params = []
if exchange:
conditions.append("exchange = ?")
params.append(exchange.lower())
if symbol:
conditions.append("symbol = ?")
params.append(symbol.upper())
if timeframe:
conditions.append("timeframe = ?")
params.append(timeframe.lower())
if before is not None:
conditions.append("time < ?")
params.append(before)
if conditions:
query = f"DELETE FROM candles WHERE {' AND '.join(conditions)}"
cursor = conn.execute(query, params)
conn.commit()
logger.info(f"Deleted {cursor.rowcount} candles")
else:
# Delete all
conn.execute("DELETE FROM candles")
conn.commit()
logger.warning("Deleted all candles from database")
def stats(self) -> dict:
"""Get database statistics."""
with self._get_connection() as conn:
cursor = conn.execute("SELECT COUNT(*) as count FROM candles")
total = cursor.fetchone()["count"]
cursor = conn.execute(
"""
SELECT exchange, symbol, timeframe, COUNT(*) as count,
MIN(time) as min_time, MAX(time) as max_time
FROM candles
GROUP BY exchange, symbol, timeframe
"""
)
entries = [dict(row) for row in cursor.fetchall()]
return {
"total_candles": total,
"num_entries": len(entries),
"entries": entries,
}