From 1ff21b56dde6fe940c0f490c155fd813fa3dbd0b Mon Sep 17 00:00:00 2001 From: Rob Date: Sun, 15 Sep 2024 14:05:08 -0300 Subject: [PATCH] Made a lot of changes to DataCache indicator data is not being saved to the database. --- .../DataCache.py | 0 .../test_DataCache.py | 0 src/BrighterTrades.py | 18 +- src/DataCache_v2.py | 2 +- src/DataCache_v3.py | 1283 +++++++++------- src/Database.py | 18 +- src/ExchangeInterface.py | 84 +- src/Users.py | 73 +- src/indicators.py | 220 +-- tests/test_DataCache.py | 1363 ++++++++++++----- 10 files changed, 1911 insertions(+), 1150 deletions(-) rename {src/archived_code => archived_code}/DataCache.py (100%) rename {src/archived_code => archived_code}/test_DataCache.py (100%) diff --git a/src/archived_code/DataCache.py b/archived_code/DataCache.py similarity index 100% rename from src/archived_code/DataCache.py rename to archived_code/DataCache.py diff --git a/src/archived_code/test_DataCache.py b/archived_code/test_DataCache.py similarity index 100% rename from src/archived_code/test_DataCache.py rename to archived_code/test_DataCache.py diff --git a/src/BrighterTrades.py b/src/BrighterTrades.py index c83e6a0..1a05226 100644 --- a/src/BrighterTrades.py +++ b/src/BrighterTrades.py @@ -14,11 +14,14 @@ from trade import Trades class BrighterTrades: def __init__(self): - # Object that interacts and maintains exchange_interface and account data - self.exchanges = ExchangeInterface() - # Object that interacts with the persistent data. - self.data = DataCache(self.exchanges) + self.data = DataCache() + + # Object that interacts and maintains exchange_interface and account data + self.exchanges = ExchangeInterface(self.data) + + # Set the exchange for datacache to use + self.data.set_exchange(self.exchanges) # Configuration for the app self.config = Configuration() @@ -34,7 +37,7 @@ class BrighterTrades: config=self.config) # Object that interacts with and maintains data from available indicators - self.indicators = Indicators(self.candles, self.users) + self.indicators = Indicators(self.candles, self.users, self.data) # Object that maintains the trades data self.trades = Trades(self.users) @@ -186,8 +189,8 @@ class BrighterTrades: :return: bool - True on success. """ active_exchanges = self.users.get_exchanges(user_name, category='active_exchanges') - success = False + success = False for exchange in active_exchanges: keys = self.users.get_api_keys(user_name, exchange) result = self.connect_or_config_exchange(user_name=user_name, @@ -391,7 +394,8 @@ class BrighterTrades: } try: - if self.exchanges.exchange_data.query("user == @user_name and name == @exchange_name").empty: + if self.data.get_cache_item().get_cache('exchange_data').query([('user', user_name), + ('name', exchange_name)]).empty: # Exchange is not connected, try to connect success = self.exchanges.connect_exchange(exchange_name=exchange_name, user_name=user_name, api_keys=api_keys) diff --git a/src/DataCache_v2.py b/src/DataCache_v2.py index ed996bb..4524c89 100644 --- a/src/DataCache_v2.py +++ b/src/DataCache_v2.py @@ -180,7 +180,7 @@ class DataCache: params.append(additional_filter[1]) # Execute the SQL query to remove the row from the database - self.db.execute_sql(sql, tuple(params)) + self.db.execute_sql(sql, params) logger.info( f"Row removed from database: table={table}, filter={filter_vals}," f" additional_filter={additional_filter}") diff --git a/src/DataCache_v3.py b/src/DataCache_v3.py index fabf652..7145179 100644 --- a/src/DataCache_v3.py +++ b/src/DataCache_v3.py @@ -1,9 +1,10 @@ -import io +import copy import pickle -from abc import ABC, abstractmethod +import time import logging import datetime as dt -from typing import Any, Tuple +from collections import deque +from typing import Any, Tuple, List, Optional import pandas as pd import numpy as np @@ -84,359 +85,537 @@ def estimate_record_count(start_time, end_time, timeframe: str) -> int: raise ValueError(f"Invalid timeframe: {timeframe}") -# Cache Interface -class Cache(ABC): - """ - Abstract base class that defines the interface for a cache. - """ +class CacheEntryMetadata: + """Stores metadata for a cache entry (row).""" - @abstractmethod - def set_item(self, key: str, data: any, expire_delta: dt.timedelta = None): - pass + def __init__(self, expiration_time: Optional[int] = None): + self.creation_time = time.time() + self.expiration_time = expiration_time + self.expiration_timestamp = self.creation_time + expiration_time if expiration_time else None - @abstractmethod - def get_item(self, key: str) -> any: - pass - - @abstractmethod - def remove_item(self, key: str): - pass - - @abstractmethod - def clean_expired_items(self): - pass - - @abstractmethod - def get_all_items(self) -> pd.DataFrame: - pass + def is_expired(self) -> bool: + """Check if the entry is expired.""" + if self.expiration_time is None: + return False # No expiration set, entry never expires + if self.expiration_time == 0: + return True # Expire immediately + return time.time() > self.expiration_timestamp -# In-Memory Cache Implementation -class InMemoryCache(Cache): - """ - In-memory storage with a size limit and customizable eviction policies. +class CacheEntry: + """Stores data and its expiration metadata.""" - Attributes: - cache (pd.DataFrame): The in-memory storage for cache items. - limit (int): The maximum number of items allowed in the cache. None means no limit. - eviction_policy (str): The policy used when the cache reaches its limit. Options: 'evict', 'deny'. + def __init__(self, data: Any, expiration_time: Optional[int] = None): + self.data = data + self.metadata = CacheEntryMetadata(expiration_time) - Methods: - set_item(key: str, data: any, expire_delta: dt.timedelta = None): Adds an item to the cache. - get_item(key: str) -> any: Retrieves an item from the cache by its key. - get_all_items() -> pd.DataFrame: Returns all items currently stored in the cache. - remove_item(key: str): Removes an item from the cache by its key. - clean_expired_items(): Cleans up expired items from the cache. - Usage Example: - # Create a cache with a limit of 2 items and 'evict' policy - cached_users = InMemoryCache(limit=2, eviction_policy='evict') +class RowBasedCache: + """Cache for storing individual rows, where each entry has a unique key.""" - # Set some items in the cache. - cached_users.set_item("user_bob", "{password:'BobPass'}", expire_delta=dt.timedelta(seconds=10)) - cached_users.set_item("user_alice", "{password:'AlicePass'}", expire_delta=dt.timedelta(seconds=20)) - - # Retrieve an item - retrieved_item = cached_users.get_item('user_bob') - print(f"Retrieved: {retrieved_item}") # Output: Retrieved: {password:'BobPass'} - - # Add another item, causing the oldest item to be evicted - cached_users.set_item("user_billy", "{password:'BillyPass'}") - - # Attempt to retrieve the evicted item - evicted_item = cached_users.get_item('user_bob') - print(f"Evicted Item: {evicted_item}") # Output: Evicted Item: None - - # Retrieve the current items in the cache - all_items = cached_users.get_all_items() - print(all_items) - - # Clean expired items - cached_users.clean_expired_items() - """ - - def __init__(self, limit: int = None, eviction_policy: str = 'evict'): - """ - Initializes the InMemoryCache with an empty DataFrame, an optional size limit, and a specified eviction policy. - - :param limit: The maximum number of items allowed in the cache. If None, the cache size is unlimited. - :param eviction_policy: The policy used when the cache reaches its limit. Options: 'evict', 'deny'. - """ - self.cache = pd.DataFrame(columns=['key', 'data', 'creation_time', 'expire_delta']) - self.limit = limit + def __init__(self, default_expiration: Optional[int] = None, size_limit: Optional[int] = None, + eviction_policy: str = "evict", purge_threshold: int = 10): + self.cache = {} + self.default_expiration = default_expiration + self.size_limit = size_limit self.eviction_policy = eviction_policy + self.access_order = deque() # Tracks the order of access for eviction + self.access_counter = 0 # Counter to track accesses + self.purge_threshold = purge_threshold # Define how often to trigger purge - def set_item(self, key: str, data: any, expire_delta: dt.timedelta = None): + def add_entry(self, key: str, data: Any, expiration_time: Optional[int] = None): + """Add an entry to the cache.""" + self._check_purge() # Check if purge is needed + if self.size_limit is not None and len(self.cache) >= self.size_limit: + if self.eviction_policy == "evict": + self.evict() + elif self.eviction_policy == "deny": + return "Cache limit reached. Entry not added." + + # If key already exists and is a DataFrame, append the new data + if key in self.cache and isinstance(self.cache[key].data, pd.DataFrame): + # If the key already exists, append the new data to the existing DataFrame + if isinstance(self.cache[key].data, pd.DataFrame): + self.cache[key].data = pd.concat([self.cache[key].data, data], ignore_index=True) + else: + self.cache[key].data = data # For non-DataFrame types, just replace the data + else: + # Otherwise, replace the entry with the new data + expiration_time = expiration_time or self.default_expiration + self.cache[key] = CacheEntry(data, expiration_time) + + # Update access order + if key not in self.access_order: + self.access_order.append(key) + else: + self.access_order.remove(key) # Move the key to the end + self.access_order.append(key) + + def get_entry(self, key: str) -> Any: + """Retrieve an entry by key, ensuring expired entries are ignored.""" + self._check_purge() # Check if purge is needed + if key in self.cache: + if not self.cache[key].metadata.is_expired(): + self.access_order.remove(key) + self.access_order.append(key) # Update access order for eviction + return self.cache[key].data + else: + del self.cache[key] # Remove expired entry + return None + + def query(self, conditions: List[Tuple[str, Any]]) -> pd.DataFrame: + """Query cache entries by conditions, ignoring expired entries.""" + self._check_purge() # Check if purge is needed + key_value = next((value for key, value in conditions if key == 'key'), None) + if key_value is None or key_value not in self.cache: + return pd.DataFrame() # Return an empty DataFrame if key is not found + + entry = self.cache[key_value] + if entry.metadata.is_expired(): + del self.cache[key_value] # Remove expired entry + return pd.DataFrame() # Return an empty DataFrame if the entry has expired + + data = entry.data + + # If the data is a DataFrame, apply the conditions using pandas .query() + if isinstance(data, pd.DataFrame): + # Construct the query string and prepare local variables for the query + query_conditions = ' and '.join([f'`{col}` == @val_{col}' for col, _ in conditions if col != 'key']) + query_vars = {f'val_{col}': val for col, val in conditions if col != 'key'} + + # Use pandas .query() with local_dict to pass the variables + return data.query(query_conditions, local_dict=query_vars) if query_conditions else data + + return pd.DataFrame([data]) # Return the non-DataFrame data as a single row DataFrame if possible + + def is_attr_taken(self, column: str, value: Any) -> bool: + """Check if a column contains the specified value in the Row-Based Cache.""" + self._check_purge() # Check if purge is needed + for key, entry in self.cache.items(): + if isinstance(entry.data, pd.DataFrame): # Only apply to DataFrames + if column in entry.data.columns: + # Use DataFrame.query to check if the column contains the value + query_result = entry.data.query(f'`{column}` == @value', local_dict={'value': value}) + if not query_result.empty: + return True # Return True if the value exists in any DataFrame row + return False # Return False if no match is found + + def evict(self): + """Evict the oldest accessed entry.""" + oldest_key = self.access_order.popleft() + del self.cache[oldest_key] + + def _check_purge(self): + """Increment the access counter and trigger purge if threshold is reached.""" + self.access_counter += 1 + if self.access_counter >= self.purge_threshold: + self._purge_expired() + self.access_counter = 0 # Reset the counter after purging + + def _purge_expired(self): + """Remove expired entries from the cache.""" + expired_keys = [key for key, entry in self.cache.items() if entry.metadata.is_expired()] + for key in expired_keys: + del self.cache[key] + self.access_order.remove(key) + + def get_all_items(self) -> dict[str, Any]: + """Retrieve all non-expired items in the cache.""" + self._check_purge() # Ensure expired entries are purged as needed + return {key: entry.data for key, entry in self.cache.items() if not entry.metadata.is_expired()} + + def remove_item(self, conditions: List[Tuple[str, Any]]) -> bool: + """Remove an item from the cache using key-value conditions. + In row cache, only 'key' is used to identify the entry. """ - Adds an item to the cache, optionally specifying an expiration duration. + # Find the value of 'key' from the conditions + key_value = next((value for key, value in conditions if key == 'key'), None) + if key_value is None or key_value not in self.cache: + return False # Key not found, so nothing to remove - :param key: The key associated with the cache item. - :param data: The data to be cached. - :param expire_delta: Optional duration after which the cache will expire. + # If no additional conditions are provided, remove the entire entry by key + if len(conditions) == 1: + del self.cache[key_value] + self.access_order.remove(key_value) + return True + + entry = self.cache[key_value] + # If the data is a DataFrame, apply additional filtering + if isinstance(entry.data, pd.DataFrame): + # Construct the query string and prepare local variables for the query + query_conditions = ' and '.join([f'`{col}` == @val_{col}' for col, _ in conditions if col != 'key']) + query_vars = {f'val_{col}': val for col, val in conditions if col != 'key'} + + # Apply the query to the DataFrame, removing matching rows + remaining_data = entry.data.query(f'not ({query_conditions})', local_dict=query_vars) + if remaining_data.empty: + # If all rows are removed, delete the entire entry + del self.cache[key_value] + self.access_order.remove(key_value) + else: + # Update the entry with the remaining rows + entry.data = remaining_data + else: + # If the data is not a DataFrame, remove the entire entry if the 'key' matches + del self.cache[key_value] + self.access_order.remove(key_value) + return True # Successfully removed the item + + +class TableBasedCache: + """Cache for storing entire tables with expiration applied to rows.""" + + def __init__(self, default_expiration: Optional[int] = None, size_limit: Optional[int] = None, + eviction_policy: str = "evict"): + self.cache = pd.DataFrame() # The DataFrame where both data and metadata are stored + self.default_expiration = default_expiration + self.size_limit = size_limit + self.eviction_policy = eviction_policy + self.access_order = deque() # Tracks the order of access for eviction + + def _check_size_limit(self): + """Check and enforce the size limit.""" + if self.size_limit and len(self.cache) > self.size_limit: + if self.eviction_policy == "evict": + excess = len(self.cache) - self.size_limit + self.evict(excess) # Evict excess rows + elif self.eviction_policy == "deny": + return False # Don't allow more rows to be added + return True + + def add_table(self, df: pd.DataFrame, expiration_time: Optional[int] = None, overwrite: Optional[str] = None, + key: Optional[str] = None): """ - if self.limit is not None and len(self.cache) >= self.limit: - if self.eviction_policy == 'evict': - # Evict the oldest item (based on creation time) - self.cache = self.cache.sort_values(by='creation_time').iloc[1:] - elif self.eviction_policy == 'deny': - # Deny adding the new item if the limit is reached - print(f"Cache limit reached. Item with key '{key}' was not added.") - return + Adds a DataFrame to the cache, attaching metadata to each row. + Optionally overwrites rows based on a column value. - creation_time = dt.datetime.now(dt.timezone.utc) - new_item = pd.DataFrame({ - 'key': [key], - 'data': [data], - 'creation_time': [creation_time], - 'expire_delta': [expire_delta] - }) - - # Remove any existing item with the same key - self.cache = self.cache[self.cache['key'] != key] - - # Add the new item - self.cache = pd.concat([self.cache, new_item], ignore_index=True) - - def get_item(self, key: str) -> any: + :param overwrite: Column name to use for identifying rows to overwrite. + :param df: The DataFrame to add. + :param expiration_time: Optional expiration time for the rows. + :param key: """ - Retrieves an item from the cache by its key. + expiration_time = expiration_time or self.default_expiration + if expiration_time is not None: + metadata = [CacheEntryMetadata(expiration_time) for _ in range(len(df))] + else: + metadata = [CacheEntryMetadata() for _ in range(len(df))] - :param key: The key associated with the cache item. - :return Any: The cached data, or None if the key does not exist or the item is expired. - """ - item = self.cache[self.cache['key'] == key] - if item.empty: - return None + # Add metadata to each row of the DataFrame + df_with_metadata = df.copy() + df_with_metadata['metadata'] = metadata - current_time = dt.datetime.now(dt.timezone.utc) - creation_time = item['creation_time'].iloc[0] - expire_delta = item['expire_delta'].iloc[0] + # If a key is provided, add a 'key' column to the DataFrame + if key is not None: + df_with_metadata['tbl_key'] = key - if pd.notna(expire_delta) and current_time > creation_time + expire_delta: - self.remove_item(key) # Remove expired item - return None + if getattr(self, 'cache', None) is None: + # If the cache is empty, initialize it with the new DataFrame + self.cache = df_with_metadata + else: + # Append the new rows + self.cache = pd.concat([self.cache, df_with_metadata], ignore_index=True) - return item['data'].iloc[0] + if overwrite: + # Drop duplicates based on the overwrite column, keeping the last occurrence (new data) + self.cache = self.cache.drop_duplicates(subset=overwrite, keep='last') - def get_all_items(self) -> pd.DataFrame: - """ - Returns all items currently stored in the cache. + # Enforce size limit + if not self._check_size_limit(): + return "Cache limit reached. Table not added." - :return pd.DataFrame: A DataFrame containing all cached items. - """ - return self.cache + def _purge_expired(self): + """Remove expired rows from the cache.""" + try: + # Filter rows where metadata is not expired, keep columns even if no valid rows + is_valid = self.cache['metadata'].apply(lambda meta: not meta.is_expired()) - def remove_item(self, key: str): - """ - Removes an item from the cache by its key. + # Filter DataFrame, ensuring columns are always kept + self.cache = self.cache.loc[is_valid].reindex(self.cache.columns, axis=1).reset_index(drop=True) + except KeyError: + raise KeyError("The 'metadata' column is missing from the cache.") + except AttributeError as e: + raise AttributeError(f"Error in metadata processing: {e}") - :param key: The key associated with the cache item to be removed. - """ - self.cache = self.cache[self.cache['key'] != key] + def query(self, conditions: List[Tuple[str, Any]]) -> pd.DataFrame: + """Query rows based on conditions and return valid (non-expired) entries.""" + self._purge_expired() # Remove expired rows before querying - def clean_expired_items(self): - """ - Cleans up expired items from the cache. Items with no expiration time (expire_delta is None) are not removed. - """ - current_time = dt.datetime.now(dt.timezone.utc) + # Start with the entire cache + result = self.cache.copy() - # Mask for non-expiring items (where expire_delta is None) - non_expiring_mask = self.cache['expire_delta'].isna() + # Replace any query for 'key' with 'tbl_key' since that's what we are using in the table-based cache + conditions = [(('tbl_key' if col == 'key' else col), val) for col, val in conditions] - # Mask for items that have not yet expired - not_expired_mask = ( - self.cache['creation_time'] + self.cache['expire_delta'].fillna(pd.Timedelta(0)) > current_time) + # Apply conditions using pandas .query() + if not result.empty: + query_conditions = ' and '.join([f'`{col}` == @val_{col}' for col, _ in conditions]) + query_vars = {f'val_{col}': val for col, val in conditions} - # Combine the masks - mask_to_keep = non_expiring_mask | not_expired_mask + # Use pandas .query() with local_dict to pass the variables + result = result.query(query_conditions, local_dict=query_vars) if query_conditions else result - # Apply the mask to filter the cache - self.cache = self.cache[mask_to_keep].reset_index(drop=True) + # Remove the metadata and tbl_key columns for the result + return result.drop(columns=['metadata', 'tbl_key'], errors='ignore') + def is_attr_taken(self, column: str, value: Any) -> bool: + """Check if a column contains the specified value in the Table-Based Cache.""" + self._purge_expired() # Ensure expired entries are removed + if column not in self.cache.columns: + return False # Column does not exist -class DataCacheBase: - """ - Manages multiple caches, delegating cache operations to the appropriate cache instance. + # Use DataFrame.query to check if the column contains the value + query_result = self.cache.query(f'`{column}` == @value', local_dict={'value': value}) + return not query_result.empty # Return True if the value exists, otherwise False - Attributes: - caches (dict[str, 'Cache']): A dictionary mapping cache names to cache instances. - - Methods: - create_cache(cache_name: str, cache_type: type = 'InMemoryCache', **kwargs): Creates a new cache with the - specified name and type. - set_cache_item(key: str, data: any, expire_delta: dt.timedelta = None, do_not_overwrite: bool = False, - cache_name: str = 'default_cache', limit: int = None, eviction_policy: str = 'evict'): - Sets an item in the specified cache, creating the cache if it doesn't exist. - cache_exists(cache_name: str, key: str) -> bool: Checks if a specific key exists in the specified cache. - get_cache_item(key: str, cache_name: str = 'default_cache') -> any: Retrieves an item from the specified cache. - get_all_cache_items(cache_name: str) -> pd.DataFrame: Returns all items from the specified cache. - remove_cache_item(cache_name: str, key: str): Removes an item from the specified cache. - clean_expired_items(cache_name: str = None): Cleans up expired items from the specified cache or all caches. - - Usage Example: - # Create a DataCacheBase instance - cache_manager = DataCacheBase() - - # Set some items in the default cache. The cache is created automatically with default settings. - cache_manager.set_cache_item('key1', 'data1', expire_delta=dt.timedelta(seconds=10)) - cache_manager.set_cache_item('key2', 'data2', expire_delta=dt.timedelta(seconds=20)) - - # Check if a key exists in the default cache. - exists = cache_manager.cache_exists('default_cache', 'key1') - print(f"Key1 exists: {exists}") # Output: Key1 exists: True - - # Add another item, causing the oldest item to be evicted. - cache_manager.set_cache_item('key3', 'data3', cache_name='default_cache') - - # Retrieve an item from the default cache. - item = cache_manager.get_cache_item('key2') - print(f"Retrieved Item: {item}") # Output: Retrieved Item: data2 - - # Attempt to retrieve the evicted item. - evicted_item = cache_manager.get_cache_item('key1') - print(f"Evicted Item: {evicted_item}") # Output: Evicted Item: None - - # Create a named cache with a limit and custom eviction policy. - cache_manager.set_cache_item('keyA', 'dataA', cache_name='my_cache', limit=3, eviction_policy='deny') - - # Set items in the named cache. - cache_manager.set_cache_item('keyB', 'dataB', cache_name='my_cache') - cache_manager.set_cache_item('keyC', 'dataC', cache_name='my_cache') - - # Retrieve all items in the named cache. - all_items = cache_manager.get_all_cache_items('my_cache') - print(all_items) - - # Remove an item from the named cache - cache_manager.remove_cache_item('my_cache', 'keyB') - - # Clean expired items in the named cache - cache_manager.clean_expired_items('my_cache') - - # Clean expired items in all caches - cache_manager.clean_expired_items() - """ - - def __init__(self): - self.caches: dict[str, 'Cache'] = {} - - def create_cache(self, cache_name: str, cache_type: type = InMemoryCache, **kwargs): - """ - Creates a new cache with the specified name and type. - - :param cache_name: The name of the cache. - :param cache_type: Optional type of cache to create (default is InMemoryCache). - :param kwargs: Additional arguments to pass to the cache constructor. - """ - if cache_name in self.caches: - raise ValueError(f"Cache with name '{cache_name}' already exists.") - self.caches[cache_name] = cache_type(**kwargs) - - def set_cache_item(self, key: str, data: any, expire_delta: dt.timedelta = None, do_not_overwrite: bool = False, - cache_name: str = 'default_cache', limit: int = None, eviction_policy: str = 'evict'): - """ - Sets or updates an entry in the specified cache. If the key already exists, the existing entry - is replaced unless `do_not_overwrite` is True. Automatically creates the cache if it doesn't exist. - - :param key: The key associated with the cache item. - :param data: The data to be cached. - :param expire_delta: The optional duration after which the cache will expire. - :param do_not_overwrite: If True, the existing entry will not be overwritten. Default is False. - :param cache_name: The name of the cache to use. Default is 'default_cache'. - :param limit: The maximum number of items allowed in the cache (only used if creating a new cache). - :param eviction_policy: The policy used when the cache reaches its limit (only used if creating a new cache). - """ - - # Automatically create the cache if it doesn't exist - if cache_name not in self.caches: - print(f"Creating Cache '{cache_name}' because it does not exist.") - self.create_cache(cache_name, cache_type=InMemoryCache, limit=limit, eviction_policy=eviction_policy) - - # Check if the key exists and handle `do_not_overwrite` - existing_data = self.get_cache_item(key=key, cache_name=cache_name) - if do_not_overwrite and existing_data is not None: - print(f"Key '{key}' already exists in cache '{cache_name}' and" - f" `do_not_overwrite` is True. Skipping update.") + def evict(self, num_rows: int = 1): + """Evict the oldest accessed rows based on the access order.""" + if len(self.cache) == 0: return - # Set or overwrite the cache item - self._get_cache(cache_name).set_item(key, data, expire_delta) + # Evict the first num_rows rows + self.cache = self.cache.iloc[num_rows:].reset_index(drop=True) - def cache_exists(self, cache_name: str, key: str) -> bool: + def get_all_items(self) -> pd.DataFrame: + """Retrieve all non-expired rows from the table-based cache.""" + self._purge_expired() # Ensure expired rows are removed + return self.cache + + def remove_item(self, conditions: List[Tuple[str, Any]]) -> bool: + """Remove rows from the table-based cache that match the key-value conditions.""" + self._purge_expired() # Ensure expired entries are removed + if self.cache.empty: + return False # Cache is empty + + # Construct the query string and prepare local variables for the query + query_conditions = ' and '.join([f'`{col}` == @val_{col}' for col, _ in conditions]) + query_vars = {f'val_{col}': val for col, val in conditions} + + # Apply the query to find matching rows + remaining_data = self.cache.query(f'not ({query_conditions})', local_dict=query_vars) + if len(remaining_data) == len(self.cache): + return False # No rows matched the conditions, so nothing was removed + + # Update the cache with the remaining data + self.cache = remaining_data + return True # Successfully removed matching rows + + +class CacheManager: + """Manages different cache types (row-based and table-based).""" + + def __init__(self): + self.caches = {} + + import pandas as pd + import datetime as dt + from typing import Optional + + def create_cache(self, name: str, cache_type: str, + size_limit: Optional[int] = None, + eviction_policy: str = 'evict', + default_expiration: Optional[dt.timedelta] = None, + columns: Optional[list] = None) -> TableBasedCache | RowBasedCache: """ - Checks if a specific key exists in the specified cache. + Creates a new cache with the given parameters. - :param cache_name: The name of the cache to check. - :param key: The key to look for in the cache. - :return: True if the key exists in the cache, False otherwise. + :param name: The name of the cache. + :param cache_type: The type of cache ('row' or 'table'). + :param size_limit: Maximum number of items allowed in the cache. + :param eviction_policy: Policy for evicting items when cache limit is reached. + :param default_expiration: A timedelta object representing the expiration time. + :param columns: Optional list of column names to initialize an empty DataFrame for a table-based cache. + :return: The created cache. """ - if cache_name not in self.caches: - return False + # Convert default_expiration timedelta to seconds + expiration_in_seconds = default_expiration.total_seconds() if \ + default_expiration not in [None, 0] else default_expiration - cache_df = self.caches[cache_name].get_all_items() - return key in cache_df['key'].values + # Create cache using expiration_in_seconds + if cache_type == 'row': + self.caches[name] = RowBasedCache(size_limit=size_limit, eviction_policy=eviction_policy, + default_expiration=expiration_in_seconds) + elif cache_type == 'table': + self.caches[name] = TableBasedCache(size_limit=size_limit, eviction_policy=eviction_policy, + default_expiration=expiration_in_seconds) - def _get_cache(self, cache_name: str) -> Cache | None: - """ - Retrieves the cache instance associated with the given cache name. - - :param cache_name: The name of the cache. - :return Cache: The cache instance associated with the cache name. - :raises ValueError: If the cache with the given name does not exist. - """ - if cache_name not in self.caches: - return None - return self.caches[cache_name] - - def get_cache_item(self, key: str, cache_name: str = 'default_cache') -> any: - """ - Retrieves an item from the specified cache. - - :param cache_name: The name of the cache. - :param key: The key associated with the cache item. - :return Any: The cached data, or None if the key does not exist or the item is expired. - """ - - cache = self._get_cache(cache_name) - if cache: - return cache.get_item(key) + # Initialize the DataFrame with provided columns if specified + if columns: + self.caches[name].add_table(df=pd.DataFrame(columns=columns)) + logging.info(f"Table-based cache '{name}' initialized with columns: {columns}") else: - return None + raise ValueError(f"Unsupported cache type: {cache_type}") - def get_all_cache_items(self, cache_name: str) -> pd.DataFrame: - """ - Returns all items from the specified cache. + logging.info(f"Cache '{name}' of type '{cache_type}' created with expiration: {default_expiration}") + return self.caches.get(name) - :param cache_name: The name of the cache. - :return pd.DataFrame: A DataFrame containing all cached items from the specified cache. - """ - return self._get_cache(cache_name).get_all_items() - - def remove_cache_item(self, cache_name: str, key: str): - """ - Removes an item from the specified cache. - - :param cache_name: The name of the cache. - :param key: The key associated with the cache item to be removed. - """ - self._get_cache(cache_name).remove_item(key) - - def clean_expired_items(self, cache_name: str = None): - """ - Cleans up expired items from the specified cache or all caches if no cache name is provided. - - :param cache_name: The name of the cache to clean, or None to clean all caches. - """ - if cache_name: - self._get_cache(cache_name).clean_expired_items() + def get_cache(self, name: str) -> RowBasedCache | TableBasedCache: + """Retrieve a cache by name.""" + if name in self.caches: + return self.caches[name] else: - for cache in self.caches.values(): - cache.clean_expired_items() + raise KeyError(f"Cache: {name}, does not exist.") + + def get_rows_from_cache(self, cache_name: str, filter_vals: list[tuple[str, Any]]) -> pd.DataFrame | None: + """ + Retrieves rows from the cache if available; + + :param cache_name: The key used to identify the cache. + :param filter_vals: A list of tuples, each containing a column name and the value(s) to filter by. + :return: A DataFrame containing the requested rows, or None if no matching rows are found. + :raises ValueError: If the cache is not a DataFrame or does not contain DataFrames in the 'data' column. + """ + # Check if the cache exists + if cache_name in self.caches: + cache = self.get_cache(cache_name) + + # Ensure the cache contains DataFrames (required for querying) + if isinstance(cache, (TableBasedCache, RowBasedCache)): + # Perform the query on the cache using filter_vals + filtered_cache = cache.query(filter_vals) # Pass the list of filters + + # If data is found in the cache, return it + if not filtered_cache.empty: + return filtered_cache + else: + raise ValueError(f"Cache '{cache_name}' does not contain DataFrames.") + + # No result return an empty Dataframe + return pd.DataFrame() + + def fetch_cache_item(self, item_name: str, cache_name: str, filter_vals: tuple[str, any]) -> any: + """ + Retrieves a specific item from the cache. + + :param item_name: The name of the column to retrieve. + :param cache_name: The name used to identify the cache (also the name of the database table). + :param filter_vals: A tuple containing the column name and the value to filter by. + :return: The value of the requested item. + :raises ValueError: If the item is not found in either the cache, + or if the column does not exist. + """ + # Fetch the relevant rows from the cache or database + rows = self.get_rows_from_cache(cache_name=cache_name, filter_vals=[filter_vals]) + if rows is not None and not rows.empty: + if item_name not in rows.columns: + raise ValueError(f"Column '{item_name}' does not exist in the cache '{cache_name}'.") + # Return the specific item from the first matching row. + return rows.iloc[0][item_name] + + # No item found in the cache that satisfied the query. + return None + + def insert_row_into_cache(self, cache_name: str, columns: tuple, values: tuple, key: str = None) -> None: + """ + Inserts a single row into the specified cache. + + :param cache_name: The name of the cache where the row should be inserted. + :param columns: A tuple of column names corresponding to the values. + :param values: A tuple of values to insert into the specified columns. + :param key: Optional key for the cache item. + """ + # Create a DataFrame for the new row + new_row_df = pd.DataFrame([values], columns=list(columns)) + + # Determine if the cache is row-based or table-based, and insert accordingly + cache = self.get_cache(cache_name) + + if isinstance(cache, RowBasedCache): + if key is None: + raise ValueError('A key must be provided for row based cache.') + # For row-based cache, insert the new row as a new cache entry using the key + cache.add_entry(key=key, data=new_row_df) + elif isinstance(cache, TableBasedCache): + # For table-based cache, append the new row to the existing DataFrame + cache.add_table(df=new_row_df) + else: + raise ValueError(f"Unknown cache type for {cache_name}") + + def insert_df_into_cache(self, df: pd.DataFrame, cache_name: str) -> None: + """ + Inserts data from a DataFrame into the specified cache. + + :param df: The DataFrame containing the data to insert. + :param cache_name: The name of the cache where the data should be inserted. + """ + cache = self.get_cache(cache_name) + + if isinstance(cache, RowBasedCache): + # For row-based cache, insert each row of the DataFrame individually using the first column as the key + for idx, row in df.iterrows(): + key = str(row[0]) # Assuming the first column is the unique key for each row + cache.add_entry(key=key, data=row.to_frame().T) # Convert row back to DataFrame for insertion + elif isinstance(cache, TableBasedCache): + # For table-based cache, insert the entire DataFrame + cache.add_table(df=df) + else: + raise ValueError(f"Unknown cache type for {cache_name}") + + def remove_row_from_cache(self, cache_name: str, filter_vals: List[tuple[str, Any]]) -> None: + """ + Removes rows from the cache based on multiple filter criteria. + + This method is specifically designed for caches stored as DataFrames. + + :param cache_name: The name of the cache (or table) from which to remove rows. + :param filter_vals: A list of tuples, each containing a column name and the value to filter by. + :raises ValueError: If the cache is not a DataFrame or if no valid cache is found. + """ + # Ensure filter_vals is a list of tuples + if not isinstance(filter_vals, list) or not all(isinstance(item, tuple) for item in filter_vals): + raise ValueError("filter_vals must be a list of tuples (column, value)") + + cache = self.get_cache(cache_name) + if cache is None: + raise ValueError(f"Cache '{cache_name}' not found.") + + # Call the cache system to remove the filtered rows + cache.remove_item(filter_vals) + + def modify_cache_item(self, cache_name: str, filter_vals: List[Tuple[str, any]], field_name: str, + new_data: any) -> None: + """ + Modifies a specific field in a row within the cache. + + :param cache_name: The name used to identify the cache. + :param filter_vals: A list of tuples containing column names and values to filter by. + :param field_name: The field to be updated. + :param new_data: The new data to be set. + :raises ValueError: If the row is not found in the cache, or if multiple rows are returned. + """ + # Retrieve the row from the cache + rows = self.get_rows_from_cache(cache_name=cache_name, filter_vals=filter_vals) + + if rows is None or rows.empty: + raise ValueError(f"Row not found in cache for {filter_vals}") + + # Check if multiple rows are returned + if len(rows) > 1: + raise ValueError(f"Multiple rows found for {filter_vals}. Please provide a more specific filter.") + + # Update the DataFrame with the new value + rows[field_name] = new_data + + # Get the cache instance + cache = self.get_cache(cache_name) + + # Set the updated row in the cache + if isinstance(cache, RowBasedCache): + # For row-based cache, the 'key' must be in filter_vals + key_value = next((val for key, val in filter_vals if key == 'key'), None) + if key_value is None: + raise ValueError("'key' must be present in filter_vals for row-based caches.") + # Update the cache entry with the modified row + cache.add_entry(key=key_value, data=rows) + elif isinstance(cache, TableBasedCache): + # For table-based cache, use the existing query method to update the correct rows + cache.add_table(rows) + else: + raise ValueError(f"Unsupported cache type for {cache_name}") -class SnapshotDataCache(DataCacheBase): +class SnapshotDataCache(CacheManager): """ - Extends DataCacheBase with snapshot functionality. + Extends DataCacheBase with snapshot functionality for both row-based and table-based caches. Attributes: snapshots (dict): A dictionary to store snapshots, with cache names as keys and snapshot data as values. @@ -445,76 +624,36 @@ class SnapshotDataCache(DataCacheBase): snapshot_cache(cache_name: str): Takes a snapshot of the specified cache and stores it. get_snapshot(cache_name: str): Retrieves the most recent snapshot of the specified cache. list_snapshots() -> dict: Lists all available snapshots along with their timestamps. - - Usage Example: - # Create a SnapshotDataCache instance - snapshot_cache_manager = SnapshotDataCache() - - # Create an in-memory cache with a limit of 2 items and 'evict' policy - snapshot_cache_manager.create_cache('my_cache', cache_type=InMemoryCache, limit=2, eviction_policy='evict') - - # Set some items in the cache - snapshot_cache_manager.set_cache_item('my_cache', 'key1', 'data1', expire_delta=dt.timedelta(seconds=10)) - snapshot_cache_manager.set_cache_item('my_cache', 'key2', 'data2', expire_delta=dt.timedelta(seconds=20)) - - # Take a snapshot of the current state of 'my_cache' - snapshot_cache_manager.snapshot_cache('my_cache') - - # Add another item, causing the oldest item to be evicted - snapshot_cache_manager.set_cache_item('my_cache', 'key3', 'data3') - - # Retrieve the most recent snapshot of 'my_cache' - snapshot = snapshot_cache_manager.get_snapshot('my_cache') - print(f"Snapshot Data:\n{snapshot}") - - # List all available snapshots with their timestamps - snapshots_list = snapshot_cache_manager.list_snapshots() - print(f"Snapshots List: {snapshots_list}") - """ + """ def __init__(self): - super().__init__() - self.snapshots = {} # Dictionary to store snapshots + super().__init__() # Call the constructor of CacheManager + self.snapshots = {} # Initialize the snapshots dictionary def snapshot_cache(self, cache_name: str): - """ - Takes a snapshot of the specified cache and stores it for later retrieval. - - :param cache_name: The name of the cache to snapshot. - :raises ValueError: If the cache with the given name does not exist. - """ + """Takes a snapshot of the specified cache and stores it with a timestamp.""" if cache_name not in self.caches: - raise ValueError(f"Cache with name '{cache_name}' does not exist.") + raise ValueError(f"Cache '{cache_name}' does not exist.") - # Create a deep copy of the cache to store as a snapshot - snapshot = self.caches[cache_name].get_all_items().copy() + # Deep copy of the cache to ensure that the snapshot is independent + cache_data = copy.deepcopy(self.caches[cache_name]) - # Store the snapshot in the snapshots dictionary with a timestamp - timestamp = dt.datetime.now(dt.timezone.utc).isoformat() - self.snapshots[cache_name] = {'timestamp': timestamp, 'data': snapshot} - - print(f"Snapshot of cache '{cache_name}' taken at {timestamp}.") + # Store the snapshot with a timestamp + self.snapshots[cache_name] = (cache_data, dt.datetime.now()) + print(f"Snapshot taken for cache '{cache_name}' at {self.snapshots[cache_name][1]}.") def get_snapshot(self, cache_name: str): - """ - Retrieves the most recent snapshot of the specified cache. - - :param cache_name: The name of the cache whose snapshot is to be retrieved. - :return: A DataFrame containing the snapshot data, or None if no snapshot exists. - """ + """Retrieves the most recent snapshot of the specified cache.""" if cache_name not in self.snapshots: - print(f"No snapshot available for cache '{cache_name}'.") - return None + raise ValueError(f"No snapshot available for cache '{cache_name}'.") - return self.snapshots[cache_name]['data'] + snapshot, timestamp = self.snapshots[cache_name] + print(f"Returning snapshot of cache '{cache_name}' taken at {timestamp}.") + return snapshot - def list_snapshots(self): - """ - Lists all available snapshots along with their timestamps. - - :return: A dictionary where keys are cache names and values are timestamps of the snapshots. - """ - return {cache: info['timestamp'] for cache, info in self.snapshots.items()} + def list_snapshots(self) -> dict: + """Lists all available snapshots along with their timestamps.""" + return {cache_name: timestamp for cache_name, (_, timestamp) in self.snapshots.items()} class DatabaseInteractions(SnapshotDataCache): @@ -523,7 +662,6 @@ class DatabaseInteractions(SnapshotDataCache): Attributes: db (Database): A database connection instance for executing queries. - exchanges (list): A list of exchanges or other relevant entities. TYPECHECKING_ENABLED (bool): A class attribute to toggle type checking. Methods: @@ -577,69 +715,54 @@ class DatabaseInteractions(SnapshotDataCache): """ TYPECHECKING_ENABLED = True - def __init__(self, exchanges): + def __init__(self): super().__init__() self.db = Database() - self.exchanges = exchanges - logger.info("DataCache initialized.") - def get_or_fetch_rows(self, cache_name: str, filter_vals: tuple[str, any]) -> pd.DataFrame | None: + def get_rows_from_datacache(self, cache_name: str, filter_vals: list[tuple[str, Any]]) -> pd.DataFrame | None: """ Retrieves rows from the cache if available; otherwise, queries the database and caches the result. :param cache_name: The key used to identify the cache (also the name of the database table). - :param filter_vals: A tuple containing the column name and the value to filter by. + :param filter_vals: A list of tuples, each containing a column name and the value(s) to filter by. :return: A DataFrame containing the requested rows, or None if no matching rows are found. :raises ValueError: If the cache is not a DataFrame or does not contain DataFrames in the 'data' column. """ - # Attempt to retrieve cached data - cache_df = self._get_valid_cache(cache_name) + result = self.get_rows_from_cache(cache_name, filter_vals) + if result.empty: + # Fallback: fetch from the database and cache the result if necessary + return self._fetch_from_database(cache_name, filter_vals) - # If the cache exists and contains data - if cache_df is not None: - combined_data = pd.concat(cache_df['data'].values.tolist(), ignore_index=True) - query_str = f"{filter_vals[0]} == @filter_vals[1]" - matching_rows = combined_data.query(query_str) - - if not matching_rows.empty: - return matching_rows - - # Fallback to database if cache is invalid or no matching rows were found - return self._fetch_from_database(cache_name, filter_vals) - - def _get_valid_cache(self, cache_name: str) -> pd.DataFrame | None: + def _fetch_from_database(self, cache_name: str, filter_vals: List[tuple[str, Any]]) -> pd.DataFrame | None: """ - Retrieves and validates the cache, ensuring it is a non-empty DataFrame containing 'data' column. - - :param cache_name: The key used to identify the cache. - :return: A valid DataFrame if cache is valid and contains data, otherwise None. - """ - if cache_name in self.caches: - cache_df = self.get_all_cache_items(cache_name=cache_name) - - # Return valid DataFrame if it exists and contains the 'data' column - if isinstance(cache_df, pd.DataFrame) and not cache_df.empty and 'data' in cache_df.columns: - return cache_df - - return None - - def _fetch_from_database(self, cache_name: str, filter_vals: tuple[str, any]) -> pd.DataFrame | None: - """ - Helper method to fetch rows from the database and cache the result. + Fetch rows from the database and cache the result. :param cache_name: The name of the table or key used to store/retrieve data. - :param filter_vals: A tuple with the filter column and value. + :param filter_vals: A list of tuples with the filter column and value. :return: A DataFrame with the fetched rows, or None if no data is found. """ + # Use db.get_rows_where, assuming it can handle multiple filters rows = self.db.get_rows_where(cache_name, filter_vals) + if rows is not None and not rows.empty: - # Store the fetched rows in the cache for future use - self.set_cache_item(key=filter_vals[1], data=rows, cache_name=cache_name) + # Cache the fetched data (let the caching system handle whether it's row or table-based) + cache = self.get_cache(cache_name) + + if isinstance(cache, RowBasedCache): + # For row-based cache, assume the first filter value is used as the key + key_value = filter_vals[0][1] # Use the value of the first filter as the key + cache.add_entry(key=key_value, data=rows) + else: + # For table-based cache, add the entire DataFrame to the cache + cache.add_table(df=rows) + + # Return the fetched rows return rows + # If no rows are found, return None return None - def fetch_item(self, item_name: str, cache_name: str, filter_vals: tuple[str, any]) -> any: + def fetch_datacache_item(self, item_name: str, cache_name: str, filter_vals: tuple[str, any]) -> any: """ Retrieves a specific item from the cache or database, caching the result if necessary. @@ -651,19 +774,19 @@ class DatabaseInteractions(SnapshotDataCache): or if the column does not exist. """ # Fetch the relevant rows from the cache or database - rows = self.get_or_fetch_rows(cache_name=cache_name, filter_vals=filter_vals) + rows = self.get_rows_from_datacache(cache_name=cache_name, filter_vals=[filter_vals]) if rows is not None and not rows.empty: if item_name not in rows.columns: raise ValueError(f"Column '{item_name}' does not exist in the cache '{cache_name}'.") # Return the specific item from the first matching row. return rows.iloc[0][item_name] - # If the item is not found, raise an error. + # If the item is not found, raise an error.todo do I want to raise an error or return empty? raise ValueError( f"Item '{item_name}' not found in cache or table '{cache_name}' where {filter_vals[0]} = {filter_vals[1]}") - def insert_row(self, cache_name: str, columns: tuple, values: tuple, key: str = None, - skip_cache: bool = False) -> None: + def insert_row_into_datacache(self, cache_name: str, columns: tuple, values: tuple, key: str = None, + skip_cache: bool = False) -> None: """ Inserts a single row into the specified cache and database, with an option to skip cache insertion. @@ -676,28 +799,16 @@ class DatabaseInteractions(SnapshotDataCache): # Insert the row into the database and fetch the auto-incremented ID auto_incremented_id = self.db.insert_row(table=cache_name, columns=columns, values=values) - if not skip_cache: - # Create a DataFrame for the new row - new_row_df = pd.DataFrame([values], columns=list(columns)) + if skip_cache: + return - # Use the auto-incremented ID as the key if none was provided - if key is None: - key = str(auto_incremented_id) + # Use the auto-incremented ID as the key if none was provided (for row-based caches) + if key is None: + key = str(auto_incremented_id) - # Check if there is already a cache item for this key - existing_data = self.get_cache_item(key=key, cache_name=cache_name) + self.insert_row_into_cache(cache_name, columns, values, key) - if existing_data is not None and isinstance(existing_data, pd.DataFrame): - # Append the new row to the existing DataFrame in the cache - combined_df = pd.concat([existing_data, new_row_df], ignore_index=True) - else: - # If no existing data, use the new DataFrame - combined_df = new_row_df - - # Set the combined DataFrame back into the cache - self.set_cache_item(cache_name=cache_name, key=key, data=combined_df) - - def insert_df(self, df: pd.DataFrame, cache_name: str, skip_cache: bool = False) -> None: + def insert_df_into_datacache(self, df: pd.DataFrame, cache_name: str, skip_cache: bool = False) -> None: """ Inserts data from a DataFrame into the specified cache and database, with an option to skip cache insertion. @@ -705,144 +816,89 @@ class DatabaseInteractions(SnapshotDataCache): :param cache_name: The name of the cache (and database table) where the data should be inserted. :param skip_cache: If True, skips inserting the data into the cache. Default is False. """ - # Insert the data into the database and fetch the auto-incremented ID - auto_incremented_id = self.db.insert_dataframe(df=df, table=cache_name) + # Insert the data into the database + self.db.insert_dataframe(df=df, table=cache_name) if not skip_cache: - # Use the auto-incremented ID as the key for the cache item - self.set_cache_item(cache_name=cache_name, key=str(auto_incremented_id), data=df) + self.insert_df_into_cache(df, cache_name) - def remove_row(self, cache_name: str, filter_vals: tuple[str, any], additional_filter: tuple[str, any] = None, - remove_from_db: bool = True) -> None: + def remove_row_from_datacache(self, cache_name: str, filter_vals: List[tuple[str, Any]], + remove_from_db: bool = True) -> None: """ - Removes a specific row from the cache and optionally from the database based on filter criteria. + Removes rows from the cache and optionally from the database based on multiple filter criteria. This method is specifically designed for caches stored as DataFrames. - :param cache_name: The name of the cache (or table) from which to remove the row. - :param filter_vals: A tuple containing the column name and the value to filter by. - :param additional_filter: An optional additional filter to apply. - :param remove_from_db: If True, also removes the row from the database. Default is True. - :raises ValueError: If the cache is not a DataFrame. + :param cache_name: The name of the cache (or table) from which to remove rows. + :param filter_vals: A list of tuples, each containing a column name and the value to filter by. + :param remove_from_db: If True, also removes the rows from the database. Default is True. + :raises ValueError: If the cache is not a DataFrame or if no valid cache is found. """ - if cache_name not in self.caches: - raise ValueError(f"Cache '{cache_name}' does not exist.") - - # Retrieve the cache object - cache_obj = self.caches[cache_name] - - # Retrieve all items in the specified cache - cache_df = cache_obj.get_all_items() - - if not isinstance(cache_df, pd.DataFrame): - raise ValueError(f"Cache '{cache_name}' is not a DataFrame and cannot be used with remove_row.") - - # Apply filtering on the 'data' column - condition = cache_df['data'].apply(lambda df: df[filter_vals[0]].eq(filter_vals[1])).any(axis=1) - - # If an additional filter is provided, apply it - if additional_filter: - condition &= cache_df['data'].apply(lambda df: df[additional_filter[0]].eq(additional_filter[1])).any( - axis=1) - - # Filter the cache DataFrame to exclude the rows that match the condition - updated_cache_df = cache_df[~condition].reset_index(drop=True) - - # Update the cache with the modified DataFrame - cache_obj.cache = updated_cache_df + self.remove_row_from_cache(cache_name, filter_vals) + # Remove from the database if required if remove_from_db: - sql = f"DELETE FROM {cache_name} WHERE {filter_vals[0]} = ?" - params = [filter_vals[1]] + # Construct SQL to remove the rows from the database based on filter_vals + sql = f"DELETE FROM {cache_name} WHERE " + " AND ".join([f"{col} = ?" for col, _ in filter_vals]) + params = [val for _, val in filter_vals] - if additional_filter: - sql += f" AND {additional_filter[0]} = ?" - params.append(additional_filter[1]) + # Execute the SQL query to remove the row from the database + self.db.execute_sql(sql, params) - self.db.execute_sql(sql, tuple(params)) - - def is_attr_taken(self, cache_name: str, attr: str, val: any) -> bool: - """ - Checks if a specific attribute in any of the DataFrames stored within the cache - (which is stored as a DataFrame in the 'data' column) has the given value. - - :param cache_name: The key used to identify the cache (also the name of the database table). - :param attr: The attribute/column name to check (e.g., 'username', 'email'). - :param val: The value of the attribute to check. - :return: True if the attribute value is found in any of the DataFrames in the cache, False otherwise. - """ - # Retrieve all items in the cache - all_items_df = self.get_all_cache_items(cache_name) - - if all_items_df.empty: - return False - - # Concatenate all DataFrames stored in the 'data' column into a single DataFrame - combined_df = pd.concat(all_items_df['data'].tolist(), ignore_index=True) - - # Check if the combined DataFrame contains the attribute and if the value matches - if attr in combined_df.columns and not combined_df[combined_df[attr] == val].empty: - return True - - return False - - def modify_item(self, cache_name: str, filter_vals: tuple[str, any], field_name: str, new_data: any) -> None: + def modify_datacache_item(self, cache_name: str, filter_vals: List[Tuple[str, any]], field_name: str, + new_data: any) -> None: """ Modifies a specific field in a row within the cache and updates the database accordingly. :param cache_name: The name used to identify the cache (also the name of the database table). - :param filter_vals: A tuple containing the column name and the value to filter by. + :param filter_vals: A list of tuples containing column names and values to filter by. :param field_name: The field to be updated. :param new_data: The new data to be set. - :raises ValueError: If the row is not found in the cache or the database. + :raises ValueError: If the row is not found in the cache or the database, or if multiple rows are returned. """ # Retrieve the row from the cache or database - row = self.get_or_fetch_rows(cache_name=cache_name, filter_vals=filter_vals) + rows = self.get_rows_from_datacache(cache_name=cache_name, filter_vals=filter_vals) - if row is None or row.empty: - raise ValueError(f"Row not found in cache or database for {filter_vals[0]} = {filter_vals[1]}") + if rows is None or rows.empty: + raise ValueError(f"Row not found in cache or database for {filter_vals}") + + # Check if multiple rows are returned + if len(rows) > 1: + raise ValueError(f"Multiple rows found for {filter_vals}. Please provide a more specific filter.") # Modify the specified field if isinstance(new_data, str): updated_value = new_data else: - updated_value = json.dumps(new_data) # Convert non-string data to JSON string + updated_value = json.dumps(new_data) # Convert non-string data to JSON string if necessary - # Update the DataFrame - row[field_name] = updated_value + # Update the DataFrame with the new value + rows[field_name] = updated_value - # Set the updated row in the cache (this will replace the old entry) - self.set_cache_item(cache_name=cache_name, key=filter_vals[1], data=row) + # Get the cache instance + cache = self.get_cache(cache_name) - # Ensure the value is a scalar before passing it to the SQL query - update_query = f"UPDATE {cache_name} SET {field_name} = ? WHERE {filter_vals[0]} = ?" - self.db.execute_sql(update_query, (updated_value, filter_vals[1])) - - def update_cached_dict(self, cache_name: str, cache_key: str, dict_key: str, data: any) -> None: - """ - Updates a dictionary stored in the DataFrame cache. - - :param cache_name: The name of the cache that holds the dictionary. - :param cache_key: The key in the cache corresponding to the dictionary. - :param dict_key: The key within the dictionary to update. - :param data: The data to insert into the dictionary. - :return: None - """ - # Retrieve the item from the cache - cache_item = self.get_cache_item(key=cache_key, cache_name=cache_name) - - if cache_item is not None: - # Ensure the item is a dictionary - if isinstance(cache_item, dict): - # Update the dictionary with the new data - cache_item[dict_key] = data - - # Save the updated dictionary back into the cache - self.set_cache_item(cache_name=cache_name, key=cache_key, data=cache_item) - else: - raise ValueError(f"Expected a dictionary in cache, but found {type(cache_item)}.") + # Set the updated row in the cache + if isinstance(cache, RowBasedCache): + # For row-based cache, the 'key' must be in filter_vals + key_value = next((val for key, val in filter_vals if key == 'key'), None) + if key_value is None: + raise ValueError("'key' must be present in filter_vals for row-based caches.") + # Update the cache entry with the modified row + cache.add_entry(key=key_value, data=rows) + elif isinstance(cache, TableBasedCache): + # For table-based cache, use the existing query method to update the correct rows + cache.add_table(rows) else: - raise KeyError(f"Cache key '{cache_key}' not found in cache '{cache_name}'.") + raise ValueError(f"Unsupported cache type for {cache_name}") + + # Update the value in the database as well + sql_update = f"UPDATE {cache_name} SET {field_name} = ? " \ + f"WHERE {' AND '.join([f'{col} = ?' for col, _ in filter_vals])}" + params = [updated_value] + [val for _, val in filter_vals] + + # Execute the SQL update to modify the database + self.db.execute_sql(sql_update, params) class ServerInteractions(DatabaseInteractions): @@ -850,8 +906,20 @@ class ServerInteractions(DatabaseInteractions): Extends DataCache to specialize in handling candle (OHLC) data and server interactions. """ - def __init__(self, exchanges): - super().__init__(exchanges) + def __init__(self): + super().__init__() + # !SET THE MAXIMUM NUMBER OF MARKETS TO KEEP IN RAM HERE! + self.exchanges = None + self.create_cache(name='candles', cache_type='row', default_expiration=dt.timedelta(days=5), + size_limit=100, eviction_policy='evict') + + def set_exchange(self, exchanges): + """ + Sets an exchange interface for this class to use. + :param exchanges: ExchangeInterface obj + :return: none. + """ + self.exchanges = exchanges @staticmethod def _make_key(ex_details: list[str]) -> str: @@ -868,16 +936,11 @@ class ServerInteractions(DatabaseInteractions): def _update_candle_cache(self, more_records: pd.DataFrame, key: str) -> None: logger.debug('Updating data with new records.') - existing_records = self.get_cache_item(cache_name='candles', key=key) - if existing_records is None or existing_records.empty: - existing_records = pd.DataFrame() + # Retrieve the 'candles' cache + candles = self.get_cache('candles') - records = pd.concat([existing_records, more_records], axis=0, ignore_index=True) - records = records.drop_duplicates(subset="time", keep='first') - records = records.sort_values(by='time').reset_index(drop=True) - records['id'] = range(1, len(records) + 1) - - self.set_cache_item(cache_name='candles', key=key, data=records) + # Store the updated records back in the cache + candles.add_entry(key=key, data=more_records) def get_records_since(self, start_datetime: dt.datetime, ex_details: list[str]) -> pd.DataFrame: """ @@ -996,7 +1059,7 @@ class ServerInteractions(DatabaseInteractions): key = self._make_key(ex_details=ex_details) logger.debug('Getting records from candles cache.') - df = self.get_cache_item(cache_name='candles', key=key) + df = self.get_cache('candles').get_entry(key=key) if df is None or df.empty: logger.debug("No cached records found.") return pd.DataFrame() @@ -1245,17 +1308,12 @@ class IndicatorCache(ServerInteractions): indicator_registry (dict): A dictionary mapping indicator types (e.g., 'SMA', 'EMA') to their classes. """ - def __init__(self, exchanges): + def __init__(self): """ Initialize the IndicatorCache with caches for indicators and their calculated data. - - :param exchanges: The exchange interfaces used for retrieving market data. """ - super().__init__(exchanges) - # Cache for storing instantiated indicator objects - self.create_cache('indicators', cache_type=InMemoryCache, limit=100, eviction_policy='evict') - # Cache for storing calculated indicator data - self.create_cache('indicator_data', cache_type=InMemoryCache, limit=500, eviction_policy='evict') + super().__init__() + # Registry of available indicators self.indicator_registry = indicators_registry @@ -1275,20 +1333,60 @@ class IndicatorCache(ServerInteractions): indicator_class = self.indicator_registry[indicator_type] return indicator_class(name=indicator_type, indicator_type=indicator_type, properties=properties) - def set_cache_item(self, key: str, data: Any, expire_delta: dt.timedelta = None, - do_not_overwrite: bool = False, cache_name: str = 'default_cache', - limit: int = None, eviction_policy: str = 'evict'): + def set_cache_item(self, cache_name: str, data: Any, key: str = None, + expire_delta: Optional[dt.timedelta] = None, + do_not_overwrite: bool = False): """ Stores an item in the cache, with custom serialization for Indicator instances. - Maintains the signature consistent with the base class. - """ - # Serialize Indicator instances using pickle - if isinstance(data, Indicator): - data = pickle.dumps(data) + Handles both row-based and table-based caches differently. - # Use the base class method for actual caching - super().set_cache_item(key, data, expire_delta=expire_delta, do_not_overwrite=do_not_overwrite, - cache_name=cache_name, limit=limit, eviction_policy=eviction_policy) + :param cache_name: The name of the cache. + :param data: The data to store in the cache. Can be a DataFrame or an Indicator instance. + :param key: The key for row-based caches, used to identify the entry. Required for row-based caches. + :param expire_delta: An optional expiration timedelta. If not provided, the cache's default expiration is used. + :param do_not_overwrite: If True, prevents overwriting existing entries in the cache. + """ + + # Convert expiration delta (if provided) to seconds + expiration_time = expire_delta.total_seconds() if expire_delta else None + + # Retrieve the specified cache by its name + cache = self.get_cache(cache_name) + + # Handle Row-Based Cache + if isinstance(cache, RowBasedCache): + if key is None: + raise ValueError("RowBasedCache requires a key to store the data.") + + # If the data is an Indicator instance, serialize it + if isinstance(data, Indicator): + data = pickle.dumps(data) + + # If overwrite is disabled and the key already exists, prevent overwrite + if do_not_overwrite and key in cache.cache: + logging.warning(f"Key '{key}' already exists in cache '{cache_name}'. Overwrite prevented.") + return + + # Add the entry to the row-based cache + cache.add_entry(key=key, data=data, expiration_time=expiration_time) + + # Handle Table-Based Cache (only accepts DataFrame) + elif isinstance(cache, TableBasedCache): + # Ensure data is a DataFrame, as only DataFrames are allowed in table-based caches + if isinstance(data, pd.DataFrame): + if do_not_overwrite: + existing_rows = cache.query([("key", key)]) + if not existing_rows.empty: + logging.warning( + f"Entry with key '{key}' already exists in cache '{cache_name}'. Overwrite prevented." + ) + return + # Add the DataFrame to the table-based cache + cache.add_table(df=data, expiration_time=expiration_time, key=key) + else: + raise ValueError("TableBasedCache can only store DataFrames.") + else: + raise ValueError(f"Unsupported cache type for '{cache_name}'") def get_cache_item(self, key: str, cache_name: str = 'default_cache') -> Any: """ @@ -1298,11 +1396,21 @@ class IndicatorCache(ServerInteractions): :param key: The key associated with the cache item. :return Any: The cached data, or None if the key does not exist or the item is expired. """ - data = super().get_cache_item(key, cache_name) + # Retrieve the cache instance + cache = self.get_cache(cache_name) - # If no data is found, return None - if data is None: - logging.info(f"No data found in cache for key: {key}") + # Handle different cache types + if isinstance(cache, RowBasedCache): + data = cache.get_entry(key=key) + elif isinstance(cache, TableBasedCache): + data = cache.query([('key', key)]) # Assuming 'key' is a valid query parameter + else: + logging.error(f"Unsupported cache type for '{cache_name}'") + return None + + # If no data is found, log and return None + if data is None or (isinstance(data, pd.DataFrame) and data.empty): + logging.info(f"No data found in cache '{cache_name}' for key: {key}") return None # Handle Indicator case (deserialize using pickle) @@ -1316,21 +1424,10 @@ class IndicatorCache(ServerInteractions): logging.warning(f"Expected Indicator instance, got {type(deserialized_data)}") return deserialized_data # Fallback: Return deserialized data even if it's not an Indicator except (pickle.PickleError, TypeError) as e: - logging.error(f"Deserialization failed for key {key}: {e}") + logging.error(f"Deserialization failed for key '{key}' in cache '{cache_name}': {e}") return None - # Handle list case - if isinstance(data, list): - logging.info(f"List data retrieved from cache for key: {key}") - return data - - # Handle DataFrame case - if isinstance(data, pd.DataFrame) and not data.empty: - logging.info(f"DataFrame retrieved from cache for key: {key}") - return data - - # Return the data as-is for any other type - logging.info(f"Data retrieved from cache for key: {key}") + logging.info(f"Data retrieved from cache '{cache_name}' for key: {key}") return data def set_user_indicator_properties(self, user_id: str, indicator_type: str, symbol: str, timeframe: str, @@ -1342,7 +1439,7 @@ class IndicatorCache(ServerInteractions): raise ValueError("display_properties must be a dictionary") user_cache_key = f"user_{user_id}_{indicator_type}_{symbol}_{timeframe}_{exchange_name}" - self.set_cache_item(user_cache_key, display_properties, cache_name='user_display_properties') + self.set_cache_item(key=user_cache_key, data=display_properties, cache_name='user_display_properties') def get_user_indicator_properties(self, user_id: str, indicator_type: str, symbol: str, timeframe: str, exchange_name: str) -> dict: @@ -1414,7 +1511,7 @@ class IndicatorCache(ServerInteractions): ) # Step 4: Cache the newly calculated data - self.set_cache_item(cache_key, calculated_data, cache_name='indicator_data') + self.set_cache_item(key=cache_key, data=calculated_data, cache_name='indicator_data') # Step 5: Retrieve and merge user-specific display properties with defaults merged_properties = self._get_merged_properties(user_name, indicator_type, symbol, timeframe, exchange_name, @@ -1630,6 +1727,6 @@ class DataCache(IndicatorCache): """ - def __init__(self, exchanges): - super().__init__(exchanges) + def __init__(self): + super().__init__() logger.info("DataCache initialized.") diff --git a/src/Database.py b/src/Database.py index c8d176f..17581c6 100644 --- a/src/Database.py +++ b/src/Database.py @@ -85,7 +85,7 @@ class Database: def __init__(self, db_file: str = None): self.db_file = db_file - def execute_sql(self, sql: str, params: tuple = ()) -> None: + def execute_sql(self, sql: str, params: list = None) -> None: """ Executes a raw SQL statement with optional parameters. @@ -115,22 +115,28 @@ class Database: error = f"Couldn't fetch item {item_name} from {table_name} where {filter_vals[0]} = {filter_vals[1]}" raise ValueError(error) - def get_rows_where(self, table: str, filter_vals: Tuple[str, Any]) -> pd.DataFrame | None: + def get_rows_where(self, table: str, filter_vals: List[Tuple[str, Any]]) -> pd.DataFrame | None: """ Returns a DataFrame containing all rows of a table that meet the filter criteria. :param table: Name of the table. - :param filter_vals: Tuple of column name and value to filter by. + :param filter_vals: List of tuples containing column names and values to filter by. :return: DataFrame of the query result or None if empty or column does not exist. """ try: with SQLite(self.db_file) as con: - qry = f"SELECT * FROM {table} WHERE {filter_vals[0]} = ?" - result = pd.read_sql(qry, con, params=(filter_vals[1],)) + # Construct the WHERE clause with multiple conditions + where_clause = " AND ".join([f"{col} = ?" for col, _ in filter_vals]) + params = [val for _, val in filter_vals] + + # Prepare and execute the query with the constructed WHERE clause + qry = f"SELECT * FROM {table} WHERE {where_clause}" + result = pd.read_sql(qry, con, params=params) + return result if not result.empty else None except (sqlite3.OperationalError, pd.errors.DatabaseError) as e: # Log the error or handle it appropriately - print(f"Error querying table '{table}' for column '{filter_vals[0]}': {e}") + print(f"Error querying table '{table}' with filters {filter_vals}: {e}") return None def insert_dataframe(self, df: pd.DataFrame, table: str) -> int: diff --git a/src/ExchangeInterface.py b/src/ExchangeInterface.py index 1603769..db48b53 100644 --- a/src/ExchangeInterface.py +++ b/src/ExchangeInterface.py @@ -1,8 +1,10 @@ import logging -from typing import List, Any, Dict +from typing import List, Any, Dict, TYPE_CHECKING import pandas as pd import ccxt from Exchange import Exchange +from DataCache_v3 import DataCache + logger = logging.getLogger(__name__) @@ -17,23 +19,38 @@ class ExchangeInterface: Connects, maintains, and routes data requests to/from multiple exchanges. """ - def __init__(self): - self.exchange_data = pd.DataFrame(columns=['user', 'name', 'reference', 'balances']) + def __init__(self, cache_manager: DataCache): + self.cache_manager = cache_manager + self.cache_manager.create_cache( + name='exchange_data', + cache_type='table', + size_limit=100, + eviction_policy='deny', + columns=['user', 'name', 'reference', 'balances'] + ) + self.available_exchanges = self.get_ccxt_exchanges() - # Create a default user and exchange for unsigned requests - default_ex_name = 'binance' - self.connect_exchange(exchange_name=default_ex_name, user_name='default') - self.default_exchange = self.get_exchange(ename=default_ex_name, uname='default') + self.default_ex_name = 'binance' + self.default_exchange = None - def get_ccxt_exchanges(self) -> List[str]: + def connect_default_exchange(self): + if self.default_exchange is not None: + return + # Create a default user and exchange for unsigned requests + self.connect_exchange(exchange_name=self.default_ex_name, user_name='default') + self.default_exchange = self.get_exchange(ename=self.default_ex_name, uname='default') + + @staticmethod + def get_ccxt_exchanges() -> List[str]: """Retrieve the list of available exchanges from CCXT.""" return ccxt.exchanges - def get_public_exchanges(self) -> List[str]: + @staticmethod + def get_public_exchanges() -> List[str]: """Return a list of public exchanges available from CCXT.""" public_list = [] - file_path = 'src\working_public_exchanges.txt' + file_path = r"src\working_public_exchanges.txt" try: with open(file_path, 'r') as file: @@ -70,8 +87,12 @@ class ExchangeInterface: :param exchange: The Exchange object to add. """ try: - row = {'user': user_name, 'name': exchange.name, 'reference': exchange, 'balances': exchange.balances} - self.exchange_data = add_row(self.exchange_data, row) + row = pd.DataFrame([{ + 'user': user_name, 'name': exchange.name, + 'reference': exchange, 'balances': exchange.balances}]) + + cache = self.cache_manager.get_cache('exchange_data') + cache.add_table(df=row) except Exception as e: logger.error(f"Couldn't create an instance of the exchange! {str(e)}") raise @@ -87,7 +108,9 @@ class ExchangeInterface: if not ename or not uname: raise ValueError('Missing argument!') - exchange_data = self.exchange_data.query("name == @ename and user == @uname") + cache = self.cache_manager.get_cache('exchange_data') + exchange_data = cache.query([('name', ename), ('user', uname)]) + if exchange_data.empty: raise ValueError('No matching exchange found.') @@ -100,7 +123,9 @@ class ExchangeInterface: :param user_name: The name of the user. :return: A list of connected exchange names. """ - return self.exchange_data.loc[self.exchange_data['user'] == user_name, 'name'].tolist() + cache = self.cache_manager.get_cache('exchange_data') + exchanges = cache.query([('user', user_name)]) + return exchanges['name'].tolist() def get_available_exchanges(self) -> List[str]: """Get a list of available exchanges.""" @@ -114,9 +139,10 @@ class ExchangeInterface: :param name: The name of the exchange. :return: A Series containing the balances. """ - filtered_data = self.exchange_data.query("user == @user_name and name == @name") - if not filtered_data.empty: - return filtered_data.iloc[0]['balances'] + cache = self.cache_manager.get_cache('exchange_data') + exchange = cache.query([('user', user_name), ('name', name)]) + if not exchange.empty: + return exchange.iloc[0]['balances'] else: return pd.Series(dtype='object') # Return an empty Series if no match is found @@ -127,12 +153,15 @@ class ExchangeInterface: :param user_name: The name of the user. :return: A dictionary containing the balances of all connected exchanges. """ - filtered_data = self.exchange_data.loc[self.exchange_data['user'] == user_name, ['name', 'balances']] - if filtered_data.empty: - return {} + # Query exchange data for the given user + cache = self.cache_manager.get_cache('exchange_data') + exchanges = cache.query([('user', user_name)]) - balances_dict = {row['name']: row['balances'] for _, row in filtered_data.iterrows()} - return balances_dict + # Select 'name' and 'balances' columns for all rows + filtered_data = exchanges.loc[:, ['name', 'balances']] + + # Return a dictionary where exchange 'name' is the key and 'balances' is the value + return {row['name']: row['balances'] for _, row in filtered_data.iterrows()} def get_all_activated(self, user_name: str, fetch_type: str = 'trades') -> Dict[str, List[Dict[str, Any]]]: """ @@ -142,16 +171,24 @@ class ExchangeInterface: :param fetch_type: The type of data to fetch ('trades' or 'orders'). :return: A dictionary indexed by exchange name with lists of active trades or open orders. """ - filtered_data = self.exchange_data.loc[self.exchange_data['user'] == user_name, ['name', 'reference']] + cache = self.cache_manager.get_cache('exchange_data') + exchanges = cache.query([('user', user_name)]) + + # Select the 'name' and 'reference' columns + filtered_data = exchanges.loc[:, ['name', 'reference']] + if filtered_data.empty: return {} data_dict = {} + + # Iterate over the filtered data for name, reference in filtered_data.itertuples(index=False): if pd.isna(reference): continue try: + # Fetch active trades or open orders based on the fetch_type if fetch_type == 'trades': data = reference.get_active_trades() elif fetch_type == 'orders': @@ -222,6 +259,7 @@ class ExchangeInterface: :return: The current price. """ if price_source is None: + self.connect_default_exchange() return self.default_exchange.get_price(symbol=symbol) else: raise ValueError(f'No implementation for price source: {price_source}') diff --git a/src/Users.py b/src/Users.py index 570d14c..7e8ecbf 100644 --- a/src/Users.py +++ b/src/Users.py @@ -2,9 +2,9 @@ import copy import datetime as dt import json import random -from typing import Any from passlib.hash import bcrypt import pandas as pd +from typing import Any from DataCache_v3 import DataCache @@ -21,6 +21,16 @@ class BaseUser: :param data_cache: Object responsible for managing cached data and database interaction. """ self.data = data_cache + # Create a table-based cache with specified columns + self.data.create_cache(name='users', + cache_type='table', + size_limit=100, + eviction_policy='deny', + default_expiration=dt.timedelta(hours=24), + columns=["id", "user_name", "status", "chart_views", "email", + "active_exchanges", "configured_exchanges", "password", + "api_keys", "signin_time", "active_indicators"] + ) def get_id(self, user_name: str) -> int: """ @@ -29,7 +39,7 @@ class BaseUser: :param user_name: The name of the user. :return: The ID of the user as an integer. """ - return self.data.fetch_item( + return self.data.fetch_datacache_item( item_name='id', cache_name='users', filter_vals=('user_name', user_name) @@ -42,7 +52,7 @@ class BaseUser: :param id: The id of the user. :return: The name of the user as a str. """ - return self.data.fetch_item( + return self.data.fetch_datacache_item( item_name='user_name', cache_name='users', filter_vals=('id', id) @@ -55,10 +65,9 @@ class BaseUser: :param user_name: The name of the user to remove from the cache. """ # Remove the user from the cache only - self.data.remove_row( - cache_name='users', - filter_vals=('user_name', user_name), remove_from_db=False - ) + self.data.remove_row_from_datacache(cache_name='users', + filter_vals=[('user_name', user_name)], + remove_from_db=False) def delete_user(self, user_name: str) -> None: """ @@ -66,10 +75,8 @@ class BaseUser: :param user_name: The name of the user to delete. """ - self.data.remove_row( - filter_vals=('user_name', user_name), - cache_name='users' - ) + self.data.remove_row_from_datacache(filter_vals=[('user_name', user_name)], + cache_name='users') def get_user_data(self, user_name: str) -> pd.DataFrame | None: """ @@ -81,10 +88,8 @@ class BaseUser: :raises ValueError: If the user is not found in both the cache and the database. """ # Attempt to fetch the user data from the cache or database via DataCache - user = self.data.get_or_fetch_rows( - cache_name='users', - filter_vals=('user_name', user_name) - ) + user = self.data.get_rows_from_datacache( + cache_name='users', filter_vals=[('user_name', user_name)]) if user is None or user.empty: raise ValueError(f"User '{user_name}' not found in database or cache!") @@ -100,9 +105,9 @@ class BaseUser: :param new_data: The new data to be set. """ # Use DataCache to modify the user's data - self.data.modify_item( + self.data.modify_datacache_item( cache_name='users', - filter_vals=('user_name', username), + filter_vals=[('user_name', username)], field_name=field_name, new_data=new_data ) @@ -154,7 +159,7 @@ class UserAccountManagement(BaseUser): :return: True if the password is correct, False otherwise. """ # Retrieve the hashed password using DataCache - user_data = self.data.get_or_fetch_rows(cache_name='users', filter_vals=('user_name', username)) + user_data = self.data.get_rows_from_datacache(cache_name='users', filter_vals=[('user_name', username)]) if user_data is None or user_data.empty: return False @@ -238,13 +243,13 @@ class UserAccountManagement(BaseUser): def user_attr_is_taken(self, attr: str, val: str) -> bool: """ Checks if a specific user attribute (e.g., username, email) is already taken. - :param attr: The attribute to check (e.g., 'user_name', 'email'). :param val: The value of the attribute to check. :return: True if the attribute is already taken, False otherwise. """ # Use DataCache to check if the attribute is taken - return self.data.is_attr_taken(cache_name='users', attr=attr, val=val) + user_cache = self.data.get_rows_from_datacache('users', [(attr, val)]) + return True if not user_cache.empty else False def create_unique_guest_name(self) -> str | None: """ @@ -262,7 +267,7 @@ class UserAccountManagement(BaseUser): username = f'guest_{suffix}' # Check if the username already exists in the database - if not self.data.get_or_fetch_rows(cache_name='users', filter_vals=('user_name', username)): + if not self.data.get_rows_from_datacache(cache_name='users', filter_vals=[('user_name', username)]): return username attempts += 1 @@ -298,7 +303,7 @@ class UserAccountManagement(BaseUser): raise ValueError("Attributes must be a tuple of single key-value pair dictionaries.") # Retrieve the default user template from the database using DataCache - default_user = self.data.get_or_fetch_rows(cache_name='users', filter_vals=('user_name', 'guest')) + default_user = self.data.get_rows_from_datacache(cache_name='users', filter_vals=[('user_name', 'guest')]) if default_user is None or default_user.empty: raise ValueError("Default user template not found in the database.") @@ -314,8 +319,10 @@ class UserAccountManagement(BaseUser): # Remove the 'id' column before inserting into the database new_user = new_user.drop(columns='id') - # Insert the modified user data into the database, skipping cache insertion - self.data.insert_df(df=new_user, cache_name="users", skip_cache=True) + # Insert the modified user as a single row, skipping cache + columns = tuple(new_user.columns) + values = tuple(new_user.iloc[0]) + self.data.insert_row_into_datacache(cache_name="users", columns=columns, values=values, skip_cache=True) def create_new_user(self, username: str, email: str, password: str) -> bool: """ @@ -464,7 +471,7 @@ class UserIndicatorManagement(UserExchangeManagement): user_id = int(self.get_id(user_name)) # Fetch the indicators from the database using DataCache - df = self.data.get_or_fetch_rows(cache_name='indicators', filter_vals=('creator', user_id)) + df = self.data.get_rows_from_datacache(cache_name='indicators', filter_vals=[('creator', user_id)]) # If indicators are found, process the JSON fields if df is not None and not df.empty: @@ -492,25 +499,11 @@ class UserIndicatorManagement(UserExchangeManagement): columns = ('creator', 'name', 'visible', 'kind', 'source', 'properties') # Insert the row into the database and cache using DataCache - self.data.insert_row(cache_name='indicators', columns=columns, values=values) + self.data.insert_row_into_datacache(cache_name='indicators', columns=columns, values=values) except Exception as e: print(f"Error saving indicator {indicator['name']} for creator {indicator['creator']}: {str(e)}") - def remove_indicator(self, indicator_name: str, user_name: str) -> None: - """ - Removes a specific indicator from the database and cache. - - :param indicator_name: The name of the indicator to remove. - :param user_name: The name of the user who created the indicator. - """ - user_id = int(self.get_id(user_name)) - self.data.remove_row( - filter_vals=('name', indicator_name), - additional_filter=('creator', user_id), - cache_name='indicators' - ) - def get_chart_view(self, user_name: str, prop: str | None = None): """ Fetches the chart view or one specific property of it for a specific user. diff --git a/src/indicators.py b/src/indicators.py index 543a3bc..1dcd279 100644 --- a/src/indicators.py +++ b/src/indicators.py @@ -1,10 +1,10 @@ import json import random from typing import Any, Optional, Dict - import numpy as np import pandas as pd import talib +import datetime as dt # A dictionary to hold both indicator types and their corresponding classes. indicators_registry = {} @@ -255,16 +255,35 @@ indicators_registry['MACD'] = MACD class Indicators: - def __init__(self, candles, users): + def __init__(self, candles, users, cache_manager): # Object manages and serves price and candle data. self.candles = candles # A connection to an object that handles user data. self.users = users - # Collection of instantiated indicators objects - self.indicators = pd.DataFrame(columns=['creator', 'name', 'visible', - 'kind', 'source', 'properties', 'ref']) + # A connection to an object that handles all data. + self.cache_manager = cache_manager + + # Cache for storing instantiated indicator objects + cache_manager.create_cache( + name='indicators', + cache_type='table', + size_limit=100, + eviction_policy='deny', + default_expiration=dt.timedelta(days=1), + columns=['creator', 'name', 'visible', 'kind', 'source', 'properties', 'ref'] + ) + + # Cache for storing calculated indicator data + cache_manager.create_cache('indicator_data', cache_type='row', size_limit=100, + default_expiration=dt.timedelta(days=7), eviction_policy='evict') + + # Cache for storing display properties indicators + cache_manager.create_cache('user_display_properties', cache_type='row', + size_limit=100, + default_expiration=dt.timedelta(days=1), + eviction_policy='evict') # Available indicator types and classes from a global indicators_registry. self.indicator_registry = indicators_registry @@ -341,27 +360,34 @@ class Indicators: :return: dict - A dictionary of indicator names as keys and their attributes as values. """ user_id = self.users.get_id(username) + if not user_id: raise ValueError(f"Invalid user_name: {username}") + # Fetch indicators based on visibility status if only_enabled: - indicators_df = self.indicators.query("creator == @user_id and visible == 1") + indicators_df = self.cache_manager.get_rows_from_datacache('indicators', [('creator', user_id), ('visible', 1)]) else: - indicators_df = self.indicators.query('creator == @user_id') + indicators_df = self.cache_manager.get_rows_from_datacache('indicators', [('creator', user_id)]) - if indicators_df.empty: - # Attempt to load from storage. - self.load_indicators(user_name=username) - indicators_df = self.indicators.query('creator == @user_id') + # Check if the DataFrame is empty + if indicators_df is None or indicators_df.empty: + return {} # Return an empty dictionary if no indicators are found - # Create the dictionary result = {} + + # Iterate over the rows and construct the result dictionary for _, row in indicators_df.iterrows(): - # Include all properties from the properties dictionary, not just a limited subset. + # Ensure that row['properties'] is a dictionary + properties = row.get('properties', {}) + if not isinstance(properties, dict): + properties = {} + + # Construct the result dictionary for each indicator result[row['name']] = { 'type': row['kind'], 'visible': row['visible'], - **row['properties'] # This will include all properties in the dictionary + **properties # Merge in all properties from the properties field } return result @@ -374,21 +400,21 @@ class Indicators: :param indicator_names: List of indicator names to set as visible. :return: None """ + indicators = self.cache_manager.get_rows_from_datacache('indicators', [('creator', user_id)]) # Validate inputs - if user_id not in self.indicators['creator'].unique(): - # raise ValueError(f"Invalid user_name: {user_name}") - # Nothing may be loaded. + if indicators.empty: return - # Set visibility for all indicators of the user - self.indicators.loc[self.indicators['creator'] == user_id, 'visible'] = 0 - # Set visibility for the specified indicator names - self.indicators.loc[self.indicators['name'].isin(indicator_names), 'visible'] = 1 + # Set visibility for all indicators off + self.cache_manager.modify_datacache_item('indicators', [('creator', user_id)], field_name='visible', new_data=0) + + # Set visibility for the specified indicators on + self.cache_manager.modify_datacache_item('indicators', [('creator', user_id), ('name', indicator_names)], + field_name='visible', new_data=1) def edit_indicator(self, user_name: str, params: dict): """ Edits an existing indicator's properties. - :param user_name: The name of the user. :param params: The updated properties of the indicator. """ @@ -398,33 +424,15 @@ class Indicators: # Get the indicator from the user's indicator list user_id = self.users.get_id(user_name) - indicator_row = self.indicators.query('name == @indicator_name and creator == @user_id') + indicator = self.cache_manager.get_rows_from_datacache('indicators', [('name', indicator_name), ('creator', user_id)]) - if indicator_row.empty: + if indicator.empty: raise ValueError(f"Indicator '{indicator_name}' not found for user '{user_name}'.") - # Update the top-level fields - top_level_keys = ['name', 'visible', 'kind'] # Top-level keys, expand this if needed - for key, value in params.items(): - if key in top_level_keys and key in indicator_row.columns: - self.indicators.at[indicator_row.index[0], key] = value - - # Update 'source' dictionary fields - if 'source' in indicator_row.columns and isinstance(indicator_row['source'].iloc[0], dict): - source_dict = indicator_row['source'].iloc[0] # Direct reference, no need for reassignment later - for key, value in params.items(): - if key in source_dict: - source_dict[key] = value - - # Update 'properties' dictionary fields - if 'properties' in indicator_row.columns and isinstance(indicator_row['properties'].iloc[0], dict): - properties_dict = indicator_row['properties'].iloc[0] # No copy, modify directly - for key, value in params.items(): - if key in properties_dict: - properties_dict[key] = value - - # Save the updated indicator for the user in the database. - self.users.save_indicators(indicator_row) + # Modify indicator. + self.cache_manager.modify_datacache_item('indicators', + [('creator', params.get('user_name')), ('name', params.get('name'))], + field_name=params.get('setting'), new_data=params.get('value')) def new_indicator(self, user_name: str, params) -> None: """ @@ -457,86 +465,93 @@ class Indicators: # Create indicator. self.create_indicator(creator=user_name, name=indcr, kind=indtyp, source=source, properties=properties) - # Update the watch-list in config. - self.save_indicator(self.indicators.loc[self.indicators.name == indcr]) def process_indicator(self, indicator, num_results: int = 1) -> pd.DataFrame | None: """ - Trigger execution of the indicator's analysis against an updated source. + Trigger execution of the indicator's analysis against an updated source. - :param indicator: A named tuple containing indicator data. - :param num_results: The number of results being requested. - :return: The results of the indicator analysis as a DataFrame. + :param indicator: A named tuple or dict containing indicator data. + :param num_results: The number of results being requested. + :return: The results of the indicator analysis as a DataFrame. """ username = self.users.get_username(indicator.creator) src = indicator.source symbol, timeframe, exchange_name = src['symbol'], src['timeframe'], src['exchange_name'] + # Retrieve necessary details to instantiate the indicator + name = indicator.name + kind = indicator.kind + properties = json.loads(indicator.properties) + # Adjust num_results to account for the lookup period if specified in the indicator properties. - if 'period' in indicator.ref.properties: - num_results += indicator.ref.properties['period'] + if 'period' in properties: + num_results += properties['period'] # Request the data from the defined source. data = self.candles.get_last_n_candles(num_candles=num_results, asset=symbol, timeframe=timeframe, exchange=exchange_name, user_name=username) - # Calculate the indicator using the retrieved data. - return indicator.ref.calculate(candles=data, user_name=username, num_results=num_results) + # Instantiate the indicator object based on the kind + indicator_class = self.indicator_registry[kind] + indicator_obj = indicator_class(name=name, indicator_type=kind, properties=properties) + + # Run the calculate method of the indicator + return indicator_obj.calculate(candles=data, user_name=username, num_results=num_results) def get_indicator_data(self, user_name: str, source: dict = None, visible_only: bool = True, start_ts: float = None, num_results: int = 1000) -> Optional[Dict[str, Any]]: """ - Loop through enabled indicators in a user's watch-list. Run the appropriate - update function and return a dictionary containing all the results. + Loop through enabled indicators in a user's watch-list. Run the appropriate + update function and return a dictionary containing all the results. - :param user_name: The name of the user making the request. - :param source: Pass in a source definition to return only results against a particular source. - :param visible_only: Returns only results marked visible. - :param start_ts: The timestamp to begin the analysis at. (Not implemented yet) - :param num_results: The number of results requested. - :return: A dictionary of timestamped data returned from each indicator indexed by the indicator's name, - or None if no indicators matched the query. + :param user_name: The name of the user making the request. + :param source: Pass in a source definition to return only results against a particular source. + :param visible_only: Returns only results marked visible. + :param start_ts: The timestamp to begin the analysis at. (Not implemented yet) + :param num_results: The number of results requested. + :return: A dictionary of timestamped data returned from each indicator indexed by the indicator's name, + or None if no indicators matched the query. """ if start_ts: - print("Warning: start_ts has not implemented in get_indicator_data()!") + print("Warning: start_ts has not been implemented in get_indicator_data()!") user_id = self.users.get_id(user_name=user_name) - # Construct the query based on user_name and visibility. - query = f"creator == {user_id}" - if visible_only: - query += " and visible == 1" + visible = 1 if visible_only else 0 # Filter the indicators based on the query. - indicators = self.indicators.loc[ - (self.indicators['creator'] == user_id) & (self.indicators['visible'] == 1)] + indicators = self.cache_manager.get_rows_from_datacache('indicators', [('creator', user_id), ('visible', visible)]) # Return None if no indicators matched the query. if indicators.empty: - # Attempt to re-load from db - self.load_indicators(user_name=user_name) - # query again. - indicators = self.indicators.loc[ - (self.indicators['creator'] == user_id) & (self.indicators['visible'] == 1)] - if indicators.empty: - return None + return None if source: - # Filter indicators by these source parameters. - if 'market' in source: - symbol = source['market']['market'] - timeframe = source['market']['timeframe'] - exchange = source['market']['exchange'] - indicators = indicators[indicators.source.apply(lambda x: x['symbol'] == symbol and - x['timeframe'] == timeframe and - x['exchange_name'] == exchange)] - else: - raise ValueError(f'No implementation for source: {source}') + # Convert 'source' column to dictionaries if they are strings + indicators['source'] = indicators['source'].apply(lambda x: json.loads(x) if isinstance(x, str) else x) + + # Extract relevant fields from the source's market + source_timeframe = source.get('market', {}).get('timeframe') + source_exchange = source.get('market', {}).get('exchange') + source_symbol = source.get('market', {}).get('market') + + # Extract fields from indicators['source'] and compare directly + mask = (indicators['source'].apply(lambda s: s.get('timeframe')) == source_timeframe) & \ + (indicators['source'].apply(lambda s: s.get('exchange_name')) == source_exchange) & \ + (indicators['source'].apply(lambda s: s.get('symbol')) == source_symbol) + + # Filter the DataFrame using the mask + filtered_indicators = indicators[mask] + + # If no indicators match the filtered source, return None. + if indicators.empty: + return None # Process each indicator, convert DataFrame to JSON-serializable format, and collect the results json_ready_results = {} + for indicator in indicators.itertuples(index=False): indicator_results = self.process_indicator(indicator=indicator, num_results=num_results) @@ -561,12 +576,8 @@ class Indicators: # Get the user ID to filter the indicators belonging to the user user_id = self.users.get_id(user_name) - # Remove the indicator from the DataFrame where the name matches and the creator is the user - self.indicators = self.indicators[ - ~((self.indicators['name'] == indicator_name) & (self.indicators['creator'] == user_id)) - ] - - self.users.remove_indicator(indicator_name=indicator_name, user_name=user_name) + identifying_values = [('name', indicator_name), ('creator', user_id)] + self.cache_manager.remove_row_from_datacache(cache_name='indicators', filter_vals=identifying_values) def create_indicator(self, creator: str, name: str, kind: str, source: dict, properties: dict, visible: bool = True): @@ -583,36 +594,29 @@ class Indicators: :param visible: Whether to display it in the chart view. :return: None """ - # Todo: Possible refactor to save without storing the indicator instance - self.indicators = self.indicators.reset_index(drop=True) creator_id = self.users.get_id(creator) # Check if an indicator with the same name already exists - existing_indicator = self.indicators.query('name == @name and creator == @creator_id') + indicators = self.cache_manager.get_rows_from_datacache('indicators', [('name', name), ('creator', creator_id)]) - if not existing_indicator.empty: + if not indicators.empty: print(f"Indicator '{name}' already exists for user '{creator}'. Skipping creation.") return # Exit the method to prevent duplicate creation if kind not in self.indicator_registry: raise ValueError(f"Requested an unsupported type of indicator: ({kind})") - indicator_class = self.indicator_registry[kind] - # Create an instance of the indicator. - indicator = indicator_class(name, kind, properties) - # Add the new indicator to a pandas dataframe. creator_id = self.users.get_id(creator) - row_data = { + row_data = pd.DataFrame([{ 'creator': creator_id, 'name': name, 'kind': kind, 'visible': visible, 'source': source, - 'properties': properties, - 'ref': indicator - } - self.indicators = pd.concat([self.indicators, pd.DataFrame([row_data])], ignore_index=True) + 'properties': properties + }]) + self.cache_manager.insert_df_into_datacache(df=row_data, cache_name="users", skip_cache=False) # def update_indicators(self, user_name): # """ diff --git a/tests/test_DataCache.py b/tests/test_DataCache.py index ae0e9c2..8353165 100644 --- a/tests/test_DataCache.py +++ b/tests/test_DataCache.py @@ -1,8 +1,10 @@ import pickle import time import pytz -from DataCache_v3 import DataCache, timeframe_to_timedelta, estimate_record_count, InMemoryCache, DataCacheBase, \ - SnapshotDataCache, IndicatorCache + + +from DataCache_v3 import DataCache, timeframe_to_timedelta, estimate_record_count, \ + SnapshotDataCache, CacheManager, RowBasedCache, TableBasedCache from ExchangeInterface import ExchangeInterface import unittest import pandas as pd @@ -202,278 +204,759 @@ class DataGenerator: class TestDataCache(unittest.TestCase): def setUp(self): - # Set up database and exchanges + # Initialize DataCache self.exchanges = ExchangeInterface() - self.exchanges.connect_exchange(exchange_name='binance', user_name='test_guy', api_keys=None) - self.exchanges.connect_exchange(exchange_name='binance', user_name='user_1', api_keys=None) - self.exchanges.connect_exchange(exchange_name='binance', user_name='user_2', api_keys=None) - self.db_file = 'test_db.sqlite' - self.database = Database(db_file=self.db_file) - - # Create necessary tables - sql_create_table_1 = f""" - CREATE TABLE IF NOT EXISTS test_table ( - id INTEGER PRIMARY KEY, - market_id INTEGER, - time INTEGER UNIQUE ON CONFLICT IGNORE, - open REAL NOT NULL, - high REAL NOT NULL, - low REAL NOT NULL, - close REAL NOT NULL, - volume REAL NOT NULL, - FOREIGN KEY (market_id) REFERENCES market (id) - )""" - sql_create_table_2 = """ - CREATE TABLE IF NOT EXISTS exchange ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT UNIQUE - )""" - sql_create_table_3 = """ - CREATE TABLE IF NOT EXISTS markets ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - symbol TEXT, - exchange_id INTEGER, - FOREIGN KEY (exchange_id) REFERENCES exchange(id) - )""" - sql_create_table_4 = f""" - CREATE TABLE IF NOT EXISTS test_table_2 ( - key TEXT PRIMARY KEY, - data TEXT NOT NULL - )""" - sql_create_table_5 = """ - CREATE TABLE IF NOT EXISTS users ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - user_name TEXT, - age INTEGER, - users_data TEXT, - data TEXT, - password TEXT -- Moved to a new line and added a comma after 'data' - ) - """ - - with SQLite(db_file=self.db_file) as con: - con.execute(sql_create_table_1) - con.execute(sql_create_table_2) - con.execute(sql_create_table_3) - con.execute(sql_create_table_4) - con.execute(sql_create_table_5) - - # Initialize DataCache, which inherits IndicatorCache self.data = DataCache(self.exchanges) - self.data.db = self.database # Keep the database setup + self.exchanges_connected = False + self.database_is_setup = False + self.test_data_loaded = False + + self.load_prerequisites() + + def tearDown(self): + if self.database_is_setup: + if os.path.exists(self.db_file): + os.remove(self.db_file) + + def load_prerequisites(self): + self.connect_exchanges() + self.set_up_database() + self.load_test_data() + + def load_test_data(self): + if self.test_data_loaded: + return # Create caches needed for testing - self.data.create_cache('candles', cache_type=InMemoryCache) + self.data.create_cache('candles', cache_type='row') # Reuse details for exchange and market self.ex_details = ['BTC/USD', '2h', 'binance', 'test_guy'] self.key = f'{self.ex_details[0]}_{self.ex_details[1]}_{self.ex_details[2]}' - def tearDown(self): - if os.path.exists(self.db_file): - os.remove(self.db_file) + self.test_data_loaded = True - def test_InMemoryCache(self): - # Step 1: Create a cache with a limit of 2 items and 'evict' policy - print("Creating a cache with a limit of 2 items and 'evict' policy.") - cached_users = InMemoryCache(limit=2, eviction_policy='evict') + def connect_exchanges(self): + if not self.exchanges_connected: + self.exchanges.connect_exchange(exchange_name='binance', user_name='test_guy', api_keys=None) + self.exchanges.connect_exchange(exchange_name='binance', user_name='user_1', api_keys=None) + self.exchanges.connect_exchange(exchange_name='binance', user_name='user_2', api_keys=None) + self.exchanges_connected = True - # Step 2: Set some items in the cache. - print("Setting 'user_bob' in the cache with an expiration of 10 seconds.") - cached_users.set_item("user_bob", "{password:'BobPass'}", expire_delta=dt.timedelta(seconds=10)) + def set_up_database(self): + if not self.database_is_setup: + self.db_file = 'test_db.sqlite' + self.database = Database(db_file=self.db_file) - print("Setting 'user_alice' in the cache with an expiration of 20 seconds.") - cached_users.set_item("user_alice", "{password:'AlicePass'}", expire_delta=dt.timedelta(seconds=20)) + # Create necessary tables + sql_create_table_1 = f""" + CREATE TABLE IF NOT EXISTS test_table ( + id INTEGER PRIMARY KEY, + market_id INTEGER, + time INTEGER UNIQUE ON CONFLICT IGNORE, + open REAL NOT NULL, + high REAL NOT NULL, + low REAL NOT NULL, + close REAL NOT NULL, + volume REAL NOT NULL, + FOREIGN KEY (market_id) REFERENCES market (id) + )""" + sql_create_table_2 = """ + CREATE TABLE IF NOT EXISTS exchange ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT UNIQUE + )""" + sql_create_table_3 = """ + CREATE TABLE IF NOT EXISTS markets ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + symbol TEXT, + exchange_id INTEGER, + FOREIGN KEY (exchange_id) REFERENCES exchange(id) + )""" + sql_create_table_4 = f""" + CREATE TABLE IF NOT EXISTS test_table_2 ( + key TEXT PRIMARY KEY, + data TEXT NOT NULL + )""" + sql_create_table_5 = """ + CREATE TABLE IF NOT EXISTS users ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_name TEXT, + age INTEGER, + users_data TEXT, + data TEXT, + password TEXT -- Moved to a new line and added a comma after 'data' + ) + """ - # Step 3: Retrieve 'user_bob' from the cache - print("Retrieving 'user_bob' from the cache.") - retrieved_item = cached_users.get_item('user_bob') - print(f"Retrieved: {retrieved_item}") - assert retrieved_item == "{password:'BobPass'}", "user_bob should have been retrieved successfully." + with SQLite(db_file=self.db_file) as con: + con.execute(sql_create_table_1) + con.execute(sql_create_table_2) + con.execute(sql_create_table_3) + con.execute(sql_create_table_4) + con.execute(sql_create_table_5) - # Step 4: Add another item, causing the oldest item to be evicted - print("Adding 'user_billy' to the cache, which should evict 'user_bob' due to the limit.") - cached_users.set_item("user_billy", "{password:'BillyPass'}") + self.data.db = self.database # Keep the database setup + self.database_is_setup = True - # Step 5: Attempt to retrieve the evicted item 'user_bob' - print("Attempting to retrieve the evicted item 'user_bob'.") - evicted_item = cached_users.get_item('user_bob') - print(f"Evicted Item: {evicted_item}") - assert evicted_item is None, "user_bob should have been evicted from the cache." + def test_cache_system(self): + print("\n---- Starting Cache System Test ----") - # Step 6: Retrieve the current items in the cache - print("Retrieving all current items in the cache after eviction.") - all_items = cached_users.get_all_items() - print("Current items in cache:\n", all_items) - assert "user_alice" in all_items['key'].values, "user_alice should still be in the cache." - assert "user_billy" in all_items['key'].values, "user_billy should still be in the cache." + # Step 1: Create CacheManager instance + cache_manager = CacheManager() + print("\n1. Created CacheManager instance.") - # Step 7: Simulate waiting for 'user_alice' to expire (assuming 20 seconds pass) - print("Simulating time passing to expire 'user_alice' (20 seconds).") - time.sleep(20) # This is to simulate the passage of time; in real tests, you may mock datetime. + # Step 2: Create a Table-Based Cache for users + cache_manager.create_cache(name="users", cache_type="table", + default_expiration=dt.timedelta(seconds=2), size_limit=5) + table_cache = cache_manager.get_cache("users") + print("\n2. Created a Table-Based Cache for 'users' with expiration set to 2 seconds.") - # Step 8: Clean expired items from the cache - print("Cleaning expired items from the cache.") - cached_users.clean_expired_items() + # Step 3: Add users DataFrame to the table-based cache + users_df = pd.DataFrame([ + {'name': 'Bob', 'age': 25, 'email': 'bob@example.com'}, + {'name': 'Alice', 'age': 30, 'email': 'alice@example.com'} + ]) + table_cache.add_table(users_df) + print("\n3. Added user data to 'users' table-based cache:") + print(users_df) - # Step 9: Retrieve the current items in the cache after cleaning expired items - print("Retrieving all current items in the cache after cleaning expired items.") - all_items_after_cleaning = cached_users.get_all_items() - print("Current items in cache after cleaning:\n", all_items_after_cleaning) - assert "user_alice" not in all_items_after_cleaning[ - 'key'].values, "user_alice should have been expired and removed from the cache." - assert "user_billy" in all_items_after_cleaning['key'].values, "user_billy should still be in the cache." + # Step 4: Query the cache to retrieve Bob's information + result = table_cache.query([('name', 'Bob')]) + print("\n4. Queried 'users' table-based cache for user 'Bob':") + print(result) + self.assertEqual(len(result), 1, "Should return exactly 1 row for Bob.") + self.assertEqual(result.iloc[0]['name'], 'Bob', "The name should be Bob.") - # Step 10: Check if 'user_billy' still exists as it should not expire - print("Checking if 'user_billy' still exists in the cache (it should not have expired).") - user_billy_item = cached_users.get_item('user_billy') - print(f"'user_billy' still exists: {user_billy_item}") - assert user_billy_item == "{password:'BillyPass'}", "user_billy should still exist in the cache." + # Step 5: Wait for 3 seconds (after expiration time) and query again to check expiration + print("\n5. Waiting for 3 seconds to allow the cache to expire...") + time.sleep(3) + result_after_expiry = table_cache.query([('name', 'Bob')]) + print("\n5. After 3 seconds, queried again for Bob. Result should be empty due to expiration:") + print(result_after_expiry) + self.assertTrue(result_after_expiry.empty, "Result should be empty as Bob's entry has expired.") - def test_DataCacheBase(self): - # Step 1: Create a DataCacheBase instance - print("Creating a DataCacheBase instance.") - cache_manager = DataCacheBase() + # Step 6: Create a Row-Based Cache for candles + cache_manager.create_cache(name="candles", cache_type="row", + default_expiration=dt.timedelta(seconds=5), size_limit=10) + row_cache = cache_manager.get_cache("candles") + print("\n6. Created a Row-Based Cache for 'candles' with expiration set to 5 seconds.") - # Step 2: Set some items in 'my_cache'. The cache is created automatically with limit 2 and 'evict' policy. - print("Setting 'key1' in 'my_cache' with an expiration of 10 seconds.") - cache_manager.set_cache_item('key1', 'data1', expire_delta=dt.timedelta(seconds=10), cache_name='my_cache', - limit=2, eviction_policy='evict') + # Step 7: Add candle data to the row-based cache + candle_data_1 = pd.DataFrame([ + {'time': '2024-09-11 00:00', 'open': 100, 'high': 105, 'low': 99, 'close': 102}, + {'time': '2024-09-11 01:00', 'open': 101, 'high': 106, 'low': 100, 'close': 103} + ]) + row_cache.add_entry("candle_1", candle_data_1) + print("\n7. Added candle data to 'candles' row-based cache under key 'candle_1':") + print(candle_data_1) - print("Setting 'key2' in 'my_cache' with an expiration of 20 seconds.") - cache_manager.set_cache_item('key2', 'data2', expire_delta=dt.timedelta(seconds=20), cache_name='my_cache') + # Step 8: Query the row-based cache to retrieve specific time entry for candle_1 + result_candle = row_cache.query([("key", "candle_1"), ("time", "2024-09-11 00:00")]) + print("\n8. Queried 'candles' row-based cache for 'candle_1' and time '2024-09-11 00:00':") + print(result_candle) + self.assertEqual(len(result_candle), 1, "Should return exactly 1 row for the specified time.") + self.assertEqual(result_candle.iloc[0]['time'], '2024-09-11 00:00', "The time should match the queried time.") - # Step 3: Set some items in 'second_cache'. The cache is created automatically with limit 3 and 'deny' policy. - print("Setting 'keyA' in 'second_cache' with an expiration of 15 seconds.") - cache_manager.set_cache_item('keyA', 'dataA', expire_delta=dt.timedelta(seconds=15), cache_name='second_cache', - limit=3, eviction_policy='deny') + # Step 9: Wait for 6 seconds (after expiration time) and query again to check expiration + print("\n9. Waiting for 6 seconds to allow the 'candle_1' cache to expire...") + time.sleep(6) + result_candle_after_expiry = row_cache.query([("key", "candle_1"), ("time", "2024-09-11 00:00")]) + print("\n9. After 6 seconds, queried again for 'candle_1'. Result should be empty due to expiration:") + print(result_candle_after_expiry) + self.assertTrue(result_candle_after_expiry.empty, "Result should be empty as 'candle_1' has expired.") - print("Setting 'keyB' in 'second_cache' with an expiration of 30 seconds.") - cache_manager.set_cache_item('keyB', 'dataB', expire_delta=dt.timedelta(seconds=30), cache_name='second_cache') + # Step 10: Test the size limit of the row-based cache (adding more than limit) + print("\n10. Testing row-based cache size limit (max 10 entries).") + for i in range(1, 12): + row_cache.add_entry(f"candle_{i}", pd.DataFrame([ + {'time': f'2024-09-11 00:00', 'open': 100 + i, 'high': 105 + i, 'low': 99 + i, 'close': 102 + i} + ])) + print(f"Added entry: candle_{i}") - print("Setting 'keyC' in 'second_cache' with no expiration.") - cache_manager.set_cache_item('keyC', 'dataC', cache_name='second_cache') + print("\n11. Checking the size of the cache after adding 11 entries (limit is 10):") + result = row_cache.get_entry("candle_1") + print(f"Checking 'candle_1': {result}") + self.assertIsNone(result, "'candle_1' should have been evicted as the size limit is 10.") - # Step 4: Add another item to 'my_cache', causing the oldest item to be evicted. - print("Adding 'key3' to 'my_cache', which should evict 'key1' due to the limit.") - cache_manager.set_cache_item('key3', 'data3', cache_name='my_cache') + # Final print statement for clarity of test ending + print("\n---- Cache System Test Completed ----") - # Step 5: Attempt to retrieve the evicted item 'key1' from 'my_cache'. - print("Attempting to retrieve the evicted item 'key1' from 'my_cache'.") - evicted_item = cache_manager.get_cache_item('key1', cache_name='my_cache') - print(f"Evicted Item from 'my_cache': {evicted_item}") - assert evicted_item is None, "'key1' should have been evicted from 'my_cache'." + def test_cache_system_advanced_usage(self): + print("\n---- Starting Advanced Cache System Test ----") - # Step 6: Retrieve all current items in both caches before cleaning. - print("Retrieving all current items in 'my_cache' before cleaning.") - all_items_my_cache = cache_manager.get_all_cache_items('my_cache') - print("Current items in 'my_cache':\n", all_items_my_cache) + # Step 1: Create CacheManager instance + cache_manager = CacheManager() + print("\n1. Created CacheManager instance.") - print("Retrieving all current items in 'second_cache' before cleaning.") - all_items_second_cache = cache_manager.get_all_cache_items('second_cache') - print("Current items in 'second_cache':\n", all_items_second_cache) + # Row-Based Cache Test with Different Data Types + cache_manager.create_cache(name="row_cache", cache_type="row", + default_expiration=dt.timedelta(seconds=5), size_limit=10) + row_cache = cache_manager.get_cache("row_cache") + print("\n2. Created a Row-Based Cache with expiration set to 5 seconds.") - # Step 7: Simulate time passing to expire 'key2' in 'my_cache' and 'keyA' in 'second_cache'. - print("Simulating time passing to expire 'key2' in 'my_cache' (20 seconds)" - " and 'keyA' in 'second_cache' (15 seconds).") - time.sleep(20) # Simulate the passage of time; in real tests, you may mock datetime. + # Step 2: Add different types of data into Row-Based Cache + # Add a string + row_cache.add_entry("message", "Hello, World!") + print("\n3. Added a string to Row-Based Cache under key 'message'.") - # Step 8: Clean expired items in all caches - print("Cleaning expired items in all caches.") - cache_manager.clean_expired_items() + # Add a dictionary + row_cache.add_entry("user_profile", {"name": "Charlie", "age": 28, "email": "charlie@example.com"}) + print("\n4. Added a dictionary to Row-Based Cache under key 'user_profile'.") - # Step 9: Verify the cleaning of expired items in 'my_cache'. - print("Retrieving all current items in 'my_cache' after cleaning expired items.") - all_items_after_cleaning_my_cache = cache_manager.get_all_cache_items('my_cache') - print("Items in 'my_cache' after cleaning:\n", all_items_after_cleaning_my_cache) - assert 'key2' not in all_items_after_cleaning_my_cache[ - 'key'].values, "'key2' should have been expired and removed from 'my_cache'." - assert 'key3' in all_items_after_cleaning_my_cache['key'].values, "'key3' should still be in 'my_cache'." + # Add a list of numbers + row_cache.add_entry("numbers", [1, 2, 3, 4, 5]) + print("\n5. Added a list of numbers to Row-Based Cache under key 'numbers'.") - # Step 10: Verify the cleaning of expired items in 'second_cache'. - print("Retrieving all current items in 'second_cache' after cleaning expired items.") - all_items_after_cleaning_second_cache = cache_manager.get_all_cache_items('second_cache') - print("Items in 'second_cache' after cleaning:\n", all_items_after_cleaning_second_cache) - assert 'keyA' not in all_items_after_cleaning_second_cache[ - 'key'].values, "'keyA' should have been expired and removed from 'second_cache'." - assert 'keyB' in all_items_after_cleaning_second_cache[ - 'key'].values, "'keyB' should still be in 'second_cache'." - assert 'keyC' in all_items_after_cleaning_second_cache[ - 'key'].values, "'keyC' should still be in 'second_cache' since it has no expiration." + # Step 3: Query the Row-Based Cache + print("\n6. Querying Row-Based Cache for different types of data:") + result_message = row_cache.query([("key", "message")]) + print(f"Query result for key 'message': {result_message}") - def test_SnapshotDataCache(self): - # Step 1: Create a SnapshotDataCache instance - print("Creating a SnapshotDataCache instance.") - snapshot_cache_manager = SnapshotDataCache() + result_profile = row_cache.query([("key", "user_profile")]) + print(f"Query result for key 'user_profile': {result_profile}") - # Step 2: Create an in-memory cache with a limit of 2 items and 'evict' policy - print("Creating an in-memory cache named 'my_cache' with a limit of 2 items and 'evict' policy.") - snapshot_cache_manager.create_cache('my_cache', cache_type=InMemoryCache, limit=2, eviction_policy='evict') + result_numbers = row_cache.query([("key", "numbers")]) + print(f"Query result for key 'numbers': {result_numbers}") - # Step 3: Set some items in the cache - print("Setting 'key1' in 'my_cache' with an expiration of 10 seconds.") - snapshot_cache_manager.set_cache_item(key='key1', data='data1', expire_delta=dt.timedelta(seconds=10), - cache_name='my_cache') + # Assert non-expired entries + self.assertEqual(result_message.iloc[0][0], "Hello, World!", "Message should be 'Hello, World!'") + self.assertEqual(result_profile.iloc[0]['name'], 'Charlie', "User profile should have name 'Charlie'") - print("Setting 'key2' in 'my_cache' with an expiration of 20 seconds.") - snapshot_cache_manager.set_cache_item(key='key2', data='data2', expire_delta=dt.timedelta(seconds=20), - cache_name='my_cache') + # Convert the DataFrame row back to a list and assert the values match + numbers_list = result_numbers.iloc[0].tolist() + self.assertEqual(numbers_list, [1, 2, 3, 4, 5], "Should return list of numbers.") - # Step 4: Take a snapshot of the current state of 'my_cache' - print("Taking a snapshot of the current state of 'my_cache'.") - snapshot_cache_manager.snapshot_cache('my_cache') + # Table-Based Cache Test with DataFrames + cache_manager.create_cache(name="table_cache", cache_type="table", + default_expiration=dt.timedelta(seconds=5), size_limit=5) + table_cache = cache_manager.get_cache("table_cache") + print("\n7. Created a Table-Based Cache with expiration set to 5 seconds.") - # Step 5: Add another item, causing the oldest item to be evicted - print("Adding 'key3' to 'my_cache', which should evict 'key1' due to the limit.") - snapshot_cache_manager.set_cache_item(key='key3', data='data3', cache_name='my_cache') + # Step 4: Add a DataFrame with mixed data types to Table-Based Cache + mixed_df = pd.DataFrame([ + {'category': 'A', 'value': 100, 'timestamp': '2024-09-12 12:00'}, + {'category': 'B', 'value': 200, 'timestamp': '2024-09-12 13:00'}, + {'category': 'A', 'value': 150, 'timestamp': '2024-09-12 14:00'} + ]) + table_cache.add_table(mixed_df) + print("\n8. Added mixed DataFrame to Table-Based Cache:") + print(mixed_df) - # Step 6: Retrieve the most recent snapshot of 'my_cache' - print("Retrieving the most recent snapshot of 'my_cache'.") - snapshot = snapshot_cache_manager.get_snapshot('my_cache') - print(f"Snapshot Data:\n{snapshot}") + # Step 5: Query the Table-Based Cache + print("\n9. Querying Table-Based Cache for category 'A':") + result_category_a = table_cache.query([("category", "A")]) + print(result_category_a) + self.assertEqual(len(result_category_a), 2, "There should be 2 rows with category 'A'.") - # Assert that the snapshot contains 'key1' and 'key2', but not 'key3' - assert 'key1' in snapshot['key'].values, "'key1' should be in the snapshot." - assert 'key2' in snapshot['key'].values, "'key2' should be in the snapshot." - assert 'key3' not in snapshot[ - 'key'].values, "'key3' should not be in the snapshot as it was added after the snapshot." + print("\n10. Querying Table-Based Cache for value greater than 100:") + result_value_gt_100 = table_cache.query([("value", 150)]) + print(result_value_gt_100) + self.assertEqual(len(result_value_gt_100), 1, "There should be 1 row with value of 150.") - # Step 7: List all available snapshots with their timestamps - print("Listing all available snapshots with their timestamps.") - snapshots_list = snapshot_cache_manager.list_snapshots() - print(f"Snapshots List: {snapshots_list}") + # Step 6: Wait for entries to expire and query again + print("\n11. Waiting for 6 seconds to let all rows expire...") + time.sleep(6) + result_after_expiry = table_cache.query([("category", "A")]) + print(f"\n12. After 6 seconds, querying again for category 'A'. Result should be empty:") + print(result_after_expiry) + self.assertTrue(result_after_expiry.empty, "Result should be empty due to expiration.") - # Assert that the snapshot list contains 'my_cache' - assert 'my_cache' in snapshots_list, "'my_cache' should be in the snapshots list." - assert isinstance(snapshots_list['my_cache'], str), "The snapshot for 'my_cache' should have a timestamp." + # Final print statement for clarity of test ending + print("\n---- Advanced Cache System Test Completed ----") - # Additional validation: Ensure 'key3' is present in the live cache but not in the snapshot - print("Ensuring 'key3' is present in the live 'my_cache'.") - live_cache_items = snapshot_cache_manager.get_all_cache_items('my_cache') - print(f"Live 'my_cache' items after adding 'key3':\n{live_cache_items}") - assert 'key3' in live_cache_items['key'].values, "'key3' should be in the live cache." + def test_cache_system_edge_cases(self): + print("\n---- Starting Edge Case Cache System Test ----") - # Ensure the live cache does not contain 'key1' - assert 'key1' not in live_cache_items['key'].values, "'key1' should have been evicted from the live cache." + # Step 1: Create CacheManager instance + cache_manager = CacheManager() + print("\n1. Created CacheManager instance.") + + # Test 1: Cache Size Limit (Row-Based Cache) + cache_manager.create_cache(name="limited_row_cache", cache_type="row", size_limit=3, eviction_policy="evict") + limited_row_cache = cache_manager.get_cache("limited_row_cache") + print("\n2. Created a Row-Based Cache with size limit of 3 and eviction policy 'evict'.") + + # Add entries beyond the size limit and check eviction + limited_row_cache.add_entry("item1", "Data 1") + print(f"Cache after adding item1: {limited_row_cache.cache}") + + limited_row_cache.add_entry("item2", "Data 2") + print(f"Cache after adding item2: {limited_row_cache.cache}") + + limited_row_cache.add_entry("item3", "Data 3") + print(f"Cache after adding item3: {limited_row_cache.cache}") + + # Add 4th entry, which should cause eviction of the first entry + limited_row_cache.add_entry("item4", "Data 4") + print(f"Cache after adding item4: {limited_row_cache.cache}") + + # Verify eviction of the oldest entry (item1) + result_item1 = limited_row_cache.query([("key", "item1")]) + print(f"Query result for 'item1' (should be evicted): {result_item1}") + self.assertTrue(result_item1.empty, "'item1' should be evicted.") + + # Verify that other items exist + result_item4 = limited_row_cache.query([("key", "item4")]) + print(f"Query result for 'item4': {result_item4}") + self.assertFalse(result_item4.empty, "'item4' should be present.") + + def test_access_counter_and_purging(self): + """Test access counter and purging mechanism.""" + print("\n---- Starting Access Counter and Purge Mechanism Test ----") + + # Step 1: Create Row-Based Cache with a purge threshold of 5 accesses + cache = RowBasedCache(default_expiration=1, purge_threshold=5) + print("\n1. Created a Row-Based Cache with purge threshold of 5 accesses.") + + # Step 2: Add entries with expiration times + cache.add_entry("item1", "Data 1") + cache.add_entry("item2", "Data 2") + print("\n2. Added 2 entries with a 1-second expiration time.") + + # Step 3: Access cache 5 times to trigger purge + for i in range(5): + cache.get_entry("item1") + print(f"\n3. Accessed cache {i+1} times.") + + # Step 4: Ensure item1 is still in cache (it hasn't expired yet because of timing) + result_item1 = cache.get_entry("item1") + print(f"\n4. Retrieved 'item1' from cache before expiration: {result_item1}") + self.assertIsNotNone(result_item1, "'item1' should still be in cache.") + + # Step 5: Wait for expiration and access again to trigger purge + time.sleep(2) + result_item1_expired = cache.get_entry("item1") + print(f"\n5. Retrieved 'item1' after expiration (should be None): {result_item1_expired}") + self.assertIsNone(result_item1_expired, "'item1' should have expired.") + + # Step 6: Access cache 5 more times to confirm expired entries are purged + for i in range(5): + cache.get_entry("item2") + print(f"\n6. Accessed cache {i+1} more times to trigger another purge.") + + # Verify item2 is also expired after accesses + result_item2_expired = cache.get_entry("item2") + print(f"\n7. Retrieved 'item2' after expiration (should be None): {result_item2_expired}") + self.assertIsNone(result_item2_expired, "'item2' should have expired.") + + def test_is_attr_taken_in_row_cache(self): + """Test the is_attr_taken method in the Row-Based Cache.""" + print("\n---- Starting is_attr_taken in Row-Based Cache Test ----") + + # Step 1: Create a Row-Based Cache + cache = RowBasedCache() + print("\n1. Created a Row-Based Cache.") + + # Step 2: Add entries with DataFrames + df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [30, 25]}) + cache.add_entry("users", df) + print("\n2. Added a DataFrame to the cache under key 'users'.") + + # Step 3: Test is_attr_taken + attr_taken = cache.is_attr_taken('name', 'Alice') + print(f"\n3. Checked if 'name' column contains 'Alice': {attr_taken}") + self.assertTrue(attr_taken, "'name' column should contain 'Alice'.") + + attr_not_taken = cache.is_attr_taken('name', 'Charlie') + print(f"\n4. Checked if 'name' column contains 'Charlie': {attr_not_taken}") + self.assertFalse(attr_not_taken, "'name' column should not contain 'Charlie'.") + + def test_is_attr_taken_in_table_cache(self): + """Test the is_attr_taken method in the Table-Based Cache.""" + print("\n---- Starting is_attr_taken in Table-Based Cache Test ----") + + # Step 1: Create a Table-Based Cache + cache = TableBasedCache() + print("\n1. Created a Table-Based Cache.") + + # Step 2: Add a DataFrame to the Table-Based Cache + df = pd.DataFrame({'name': ['Alice', 'Charlie'], 'age': [30, 40]}) + cache.add_table(df) + print("\n2. Added a DataFrame to the Table-Based Cache.") + + # Step 3: Test is_attr_taken + attr_taken = cache.is_attr_taken('name', 'Alice') + print(f"\n3. Checked if 'name' column contains 'Alice': {attr_taken}") + self.assertTrue(attr_taken, "'name' column should contain 'Alice'.") + + attr_not_taken = cache.is_attr_taken('name', 'Bob') + print(f"\n4. Checked if 'name' column contains 'Bob': {attr_not_taken}") + self.assertFalse(attr_not_taken, "'name' column should not contain 'Bob'.") + + def test_expired_entry_handling(self): + """Test that expired entries are not returned when querying.""" + print("\n---- Starting Expired Entry Handling Test ----") + + # Step 1: Create a Row-Based Cache with a 1-second expiration + cache = RowBasedCache(default_expiration=1) + print("\n1. Created a Row-Based Cache with a 1-second expiration.") + + # Step 2: Add an entry + cache.add_entry("item1", "Temporary Data") + print("\n2. Added an entry 'item1' with a 1-second expiration.") + + # Step 3: Wait for expiration + time.sleep(2) + + # Step 4: Query the expired entry + result = cache.get_entry("item1") + print(f"\n4. Queried 'item1' after expiration: {result}") + self.assertIsNone(result, "'item1' should have expired and not be returned.") + + def test_remove_item_with_conditions_row_cache(self): + """Test remove_item method in Row-Based Cache with conditions.""" + print("\n---- Starting remove_item with conditions in Row-Based Cache Test ----") + + # Step 1: Create a Row-Based Cache + cache = RowBasedCache() + print("\n1. Created a Row-Based Cache.") + + # Step 2: Add a DataFrame to the cache under 'users' + df = pd.DataFrame({'name': ['Alice', 'Bob', 'Charlie'], 'age': [30, 25, 35]}) + cache.add_entry("users", df) + print("\n2. Added a DataFrame to the cache under key 'users'.") + print(cache.get_entry("users")) + + # Step 3: Remove a specific row (where 'name' == 'Alice') from the DataFrame + removed = cache.remove_item([('key', 'users'), ('name', 'Alice')]) + print(f"\n3. Removed 'Alice' from the DataFrame: {removed}") + self.assertTrue(removed, "'Alice' should be removed from the DataFrame.") + + # Verify that 'Alice' was removed + remaining_data = cache.get_entry('users') + print(f"\n4. Remaining data in 'users': \n{remaining_data}") + self.assertNotIn('Alice', remaining_data['name'].values, "'Alice' should no longer be in the DataFrame.") + self.assertIn('Bob', remaining_data['name'].values, "'Bob' should still be in the DataFrame.") + + # Step 4: Remove the last remaining row (where 'name' == 'Charlie') + removed = cache.remove_item([('key', 'users'), ('name', 'Charlie')]) + print(f"\n5. Removed 'Charlie' from the DataFrame: {removed}") + remaining_data = cache.get_entry('users') + print(f"\n6. Remaining data in 'users' after 'Charlie' removal: \n{remaining_data}") + self.assertNotIn('Charlie', remaining_data['name'].values, "'Charlie' should no longer be in the DataFrame.") + + # Step 5: Remove the last row (where 'name' == 'Bob'), this should remove the entire entry + removed = cache.remove_item([('key', 'users'), ('name', 'Bob')]) + print(f"\n7. Removed 'Bob' from the DataFrame: {removed}") + remaining_data = cache.get_entry('users') + print(f"\n8. Remaining data in 'users' after removing 'Bob' (should be None): {remaining_data}") + self.assertIsNone(remaining_data, "'users' entry should no longer exist in the cache.") + + def test_remove_item_with_conditions_table_cache(self): + """Test remove_item method in Table-Based Cache with conditions.""" + print("\n---- Starting remove_item with conditions in Table-Based Cache Test ----") + + # Step 1: Create a Table-Based Cache + cache = TableBasedCache() + print("\n1. Created a Table-Based Cache.") + + # Step 2: Add a DataFrame to the cache + df = pd.DataFrame({'name': ['Alice', 'Bob', 'Charlie'], 'age': [30, 25, 35]}) + cache.add_table(df) + print("\n2. Added a DataFrame to the Table-Based Cache.") + print(cache.get_all_items()) + + # Step 3: Remove a specific row (where 'name' == 'Alice') + removed = cache.remove_item([('name', 'Alice')]) + print(f"\n3. Removed 'Alice' from the table: {removed}") + self.assertTrue(removed, "'Alice' should be removed from the table-based cache.") + + # Verify that 'Alice' was removed + remaining_data = cache.get_all_items() + print(f"\n4. Remaining data in the cache: \n{remaining_data}") + self.assertNotIn('Alice', remaining_data['name'].values, "'Alice' should no longer be in the table.") + self.assertIn('Bob', remaining_data['name'].values, "'Bob' should still be in the table.") + + # Step 4: Remove another row (where 'name' == 'Charlie') + removed = cache.remove_item([('name', 'Charlie')]) + print(f"\n5. Removed 'Charlie' from the table: {removed}") + remaining_data = cache.get_all_items() + print(f"\n6. Remaining data in the cache after removing 'Charlie': \n{remaining_data}") + self.assertNotIn('Charlie', remaining_data['name'].values, "'Charlie' should no longer be in the table.") + + # Step 5: Remove the last row (where 'name' == 'Bob') + removed = cache.remove_item([('name', 'Bob')]) + print(f"\n7. Removed 'Bob' from the table: {removed}") + remaining_data = cache.get_all_items() + print(f"\n8. Remaining data in the cache after removing 'Bob' (should be empty): \n{remaining_data}") + self.assertTrue(remaining_data.empty, "The table should be empty after removing all rows.") + + def test_remove_item_with_conditions_market_data(self): + """Test remove_item method in Row-Based Cache with market OHLC data.""" + print("\n---- Starting remove_item with market OHLC data in Row-Based Cache Test ----") + + # Step 1: Create a Row-Based Cache for market data + cache = RowBasedCache() + print("\n1. Created a Row-Based Cache for market data.") + + # Step 2: Add OHLC data for 'BTC' and 'ETH' + btc_data = pd.DataFrame({ + 'timestamp': ['2024-09-10 12:00', '2024-09-10 12:05', '2024-09-10 12:10'], + 'open': [30000, 30100, 30200], + 'high': [30500, 30600, 30700], + 'low': [29900, 30050, 30150], + 'close': [30400, 30550, 30650] + }) + eth_data = pd.DataFrame({ + 'timestamp': ['2024-09-10 12:00', '2024-09-10 12:05', '2024-09-10 12:10'], + 'open': [2000, 2010, 2020], + 'high': [2050, 2060, 2070], + 'low': [1990, 2005, 2015], + 'close': [2040, 2055, 2065] + }) + cache.add_entry("BTC", btc_data) + cache.add_entry("ETH", eth_data) + print("\n2. Added OHLC data for 'BTC' and 'ETH'.") + print(f"BTC Data:\n{cache.get_entry('BTC')}") + print(f"ETH Data:\n{cache.get_entry('ETH')}") + + # Step 3: Remove a specific row from 'BTC' data where timestamp == '2024-09-10 12:05' + removed = cache.remove_item([('key', 'BTC'), ('timestamp', '2024-09-10 12:05')]) + print(f"\n3. Removed '2024-09-10 12:05' row from 'BTC' data: {removed}") + self.assertTrue(removed, "'2024-09-10 12:05' should be removed from the 'BTC' data.") + + # Verify that the timestamp was removed from 'BTC' data + remaining_btc = cache.get_entry('BTC') + print(f"\n4. Remaining BTC data after removal:\n{remaining_btc}") + self.assertNotIn('2024-09-10 12:05', remaining_btc['timestamp'].values, + "'2024-09-10 12:05' should no longer be in the 'BTC' data.") + + # Step 4: Remove entire 'ETH' data entry + removed_eth = cache.remove_item([('key', 'ETH')]) + print(f"\n5. Removed entire 'ETH' data entry: {removed_eth}") + self.assertTrue(removed_eth, "'ETH' data should be removed from the cache.") + + # Verify that 'ETH' was completely removed from the cache + remaining_eth = cache.get_entry('ETH') + print(f"\n6. Remaining ETH data after removal (should be None): {remaining_eth}") + self.assertIsNone(remaining_eth, "'ETH' entry should no longer exist in the cache.") + + def test_remove_item_with_conditions_trade_stats(self): + """Test remove_item method in Row-Based Cache with trade statistics data.""" + print("\n---- Starting remove_item with trade statistics in Row-Based Cache Test ----") + + # Step 1: Create a Row-Based Cache for trade statistics + cache = RowBasedCache() + print("\n1. Created a Row-Based Cache for trade statistics.") + + # Step 2: Add trade statistics for 'strategy_1' and 'strategy_2' + strategy_1_data = pd.DataFrame({ + 'date': ['2024-09-10', '2024-09-11', '2024-09-12'], + 'success_rate': [80, 85, 75], + 'trades': [10, 12, 8] + }) + strategy_2_data = pd.DataFrame({ + 'date': ['2024-09-10', '2024-09-11', '2024-09-12'], + 'success_rate': [60, 70, 65], + 'trades': [15, 17, 14] + }) + cache.add_entry("strategy_1", strategy_1_data) + cache.add_entry("strategy_2", strategy_2_data) + print("\n2. Added trade statistics for 'strategy_1' and 'strategy_2'.") + print(f"Strategy 1 Data:\n{cache.get_entry('strategy_1')}") + print(f"Strategy 2 Data:\n{cache.get_entry('strategy_2')}") + + # Step 3: Remove a specific row from 'strategy_1' where date == '2024-09-11' + removed = cache.remove_item([('key', 'strategy_1'), ('date', '2024-09-11')]) + print(f"\n3. Removed '2024-09-11' row from 'strategy_1' data: {removed}") + self.assertTrue(removed, "'2024-09-11' should be removed from the 'strategy_1' data.") + + # Verify that the date was removed from 'strategy_1' data + remaining_strategy_1 = cache.get_entry('strategy_1') + print(f"\n4. Remaining strategy_1 data after removal:\n{remaining_strategy_1}") + self.assertNotIn('2024-09-11', remaining_strategy_1['date'].values, + "'2024-09-11' should no longer be in the 'strategy_1' data.") + + # Step 4: Remove entire 'strategy_2' data entry + removed_strategy_2 = cache.remove_item([('key', 'strategy_2')]) + print(f"\n5. Removed entire 'strategy_2' data entry: {removed_strategy_2}") + self.assertTrue(removed_strategy_2, "'strategy_2' data should be removed from the cache.") + + # Verify that 'strategy_2' was completely removed from the cache + remaining_strategy_2 = cache.get_entry('strategy_2') + print(f"\n6. Remaining strategy_2 data after removal (should be None): {remaining_strategy_2}") + self.assertIsNone(remaining_strategy_2, "'strategy_2' entry should no longer exist in the cache.") + + def test_remove_item_with_other_data_types(self): + """Test remove_item method in Row-Based Cache with different data types.""" + print("\n---- Starting remove_item with different data types in Row-Based Cache Test ----") + + # Step 1: Create a Row-Based Cache for mixed data types + cache = RowBasedCache() + print("\n1. Created a Row-Based Cache for mixed data types.") + + # Step 2: Add entries with different data types + + # String + cache.add_entry("message", "Hello, World!") + print("\n2. Added a string 'Hello, World!' under key 'message'.") + + # Dictionary + cache.add_entry("user_profile", {"name": "Alice", "age": 30, "email": "alice@example.com"}) + print("\n3. Added a dictionary under key 'user_profile'.") + + # List + cache.add_entry("numbers", [1, 2, 3, 4, 5]) + print("\n4. Added a list of numbers under key 'numbers'.") + + # Integer + cache.add_entry("count", 42) + print("\n5. Added an integer '42' under key 'count'.") + + # Step 3: Remove specific entries based on key + + # Remove string entry + removed_message = cache.remove_item([('key', 'message')]) + print(f"\n6. Removed string entry: {removed_message}") + self.assertTrue(removed_message, "'message' should be removed from the cache.") + self.assertIsNone(cache.get_entry('message'), "'message' entry should no longer exist.") + + # Remove dictionary entry + removed_user_profile = cache.remove_item([('key', 'user_profile')]) + print(f"\n7. Removed dictionary entry: {removed_user_profile}") + self.assertTrue(removed_user_profile, "'user_profile' should be removed from the cache.") + self.assertIsNone(cache.get_entry('user_profile'), "'user_profile' entry should no longer exist.") + + # Remove list entry + removed_numbers = cache.remove_item([('key', 'numbers')]) + print(f"\n8. Removed list entry: {removed_numbers}") + self.assertTrue(removed_numbers, "'numbers' should be removed from the cache.") + self.assertIsNone(cache.get_entry('numbers'), "'numbers' entry should no longer exist.") + + # Remove integer entry + removed_count = cache.remove_item([('key', 'count')]) + print(f"\n9. Removed integer entry: {removed_count}") + self.assertTrue(removed_count, "'count' should be removed from the cache.") + self.assertIsNone(cache.get_entry('count'), "'count' entry should no longer exist.") + + def test_snapshot_row_based_cache(self): + """Test snapshot functionality with row-based cache.""" + print("\n---- Starting Snapshot Test with Row-Based Cache ----") + + # Step 1: Create an instance of SnapshotDataCache + snapshot_cache = SnapshotDataCache() + print("\n1. Created SnapshotDataCache instance.") + + # Step 2: Create a row-based cache and add data + snapshot_cache.create_cache(name="market_data", cache_type="row") + market_data = pd.DataFrame({ + 'timestamp': ['2024-09-10 12:00', '2024-09-10 12:05'], + 'open': [30000, 30100], + 'high': [30500, 30600], + 'low': [29900, 30050], + 'close': [30400, 30550] + }) + snapshot_cache.get_cache("market_data").add_entry("BTC", market_data) + print("\n2. Added 'BTC' market data to row-based cache.") + + # Step 3: Take a snapshot of the row-based cache + snapshot_cache.snapshot_cache("market_data") + snapshot_list = snapshot_cache.list_snapshots() + print(f"\n3. Snapshot list after taking snapshot: {snapshot_list}") + self.assertIn("market_data", snapshot_list, "Snapshot for 'market_data' should be present.") + + # Step 4: Retrieve the snapshot and verify its contents + snapshot = snapshot_cache.get_snapshot("market_data") + print(f"\n4. Retrieved snapshot of 'market_data':\n{snapshot.get_entry('BTC')}") + pd.testing.assert_frame_equal(snapshot.get_entry('BTC'), market_data) + + # Step 5: Add more data to the live cache and verify the snapshot is unchanged + additional_data = pd.DataFrame({ + 'timestamp': ['2024-09-10 12:10'], + 'open': [30200], + 'high': [30700], + 'low': [30150], + 'close': [30650] + }) + snapshot_cache.get_cache("market_data").add_entry("BTC", additional_data) + print("\n5. Added additional data to the live 'BTC' cache.") + + # Verify live cache has updated but the snapshot remains unchanged + live_data = snapshot_cache.get_cache("market_data").get_entry("BTC") + print(f"\n6. Live 'BTC' cache data:\n{live_data}") + self.assertEqual(len(live_data), 3, "Live cache should have 3 rows after adding more data.") + + # Ensure the snapshot still has the original data + snapshot_data = snapshot_cache.get_snapshot("market_data").get_entry("BTC") + print(f"\n7. Snapshot data (should still be original):\n{snapshot_data}") + self.assertEqual(len(snapshot_data), 2, "Snapshot should still have the original 2 rows.") + + def test_snapshot_table_based_cache_with_overwrite_column(self): + """Test snapshot functionality with table-based cache and overwrite by column.""" + print("\n---- Starting Snapshot Test with Table-Based Cache ----") + + # Step 1: Create an instance of SnapshotDataCache + snapshot_cache = SnapshotDataCache() + print("\n1. Created SnapshotDataCache instance.") + + # Step 2: Create a table-based cache and add initial data + user_data = pd.DataFrame({ + 'name': ['Alice', 'Bob'], + 'email': ['alice@example.com', 'bob@example.com'], + 'age': [30, 25] + }) + snapshot_cache.create_cache(name="user_data", cache_type="table") + snapshot_cache.get_cache("user_data").add_table(user_data) + print("\n2. Added user data to table-based cache.") + + # Step 3: Take a snapshot of the table-based cache + snapshot_cache.snapshot_cache("user_data") + snapshot_list = snapshot_cache.list_snapshots() + print(f"\n3. Snapshot list after taking snapshot: {snapshot_list}") + self.assertIn("user_data", snapshot_list, "Snapshot for 'user_data' should be present.") + + # Step 4: Retrieve the snapshot and verify its contents (excluding metadata) + snapshot = snapshot_cache.get_snapshot("user_data") + snapshot_data = snapshot.get_all_items().drop(columns=['metadata']) + print(f"\n4. Retrieved snapshot of 'user_data' (without metadata):\n{snapshot_data}") + pd.testing.assert_frame_equal(snapshot_data, user_data) + + # Step 5: Modify the live cache and overwrite specific rows by 'name' + updated_user_data = pd.DataFrame({ + 'name': ['Alice', 'Bob', 'Charlie'], + 'email': ['alice@example.com', 'bob@example.com', 'charlie@example.com'], + 'age': [35, 25, 40] + }) + snapshot_cache.get_cache("user_data").add_table(updated_user_data, overwrite='name') + print("\n5. Updated live table by overwriting rows based on 'name'.") + + # Verify live cache has updated but the snapshot remains unchanged + live_data = snapshot_cache.get_cache("user_data").get_all_items().drop(columns=['metadata']) + print(f"\n6. Live user_data table (without metadata):\n{live_data}") + self.assertEqual(len(live_data), 3, + "Live cache should have 3 rows after adding 'Charlie' and overwriting 'Alice'.") + + # Ensure the snapshot still has the original data + snapshot_data = snapshot_cache.get_snapshot("user_data").get_all_items().drop(columns=['metadata']) + print(f"\n7. Snapshot data (should still be original, without metadata):\n{snapshot_data}") + self.assertEqual(len(snapshot_data), 2, "Snapshot should still have the original 2 rows.") def test_update_candle_cache(self): + self.load_prerequisites() + print('Testing update_candle_cache() method:') + # Set a cache key + candle_cache_key = f'{self.ex_details[0]}_{self.ex_details[1]}_{self.ex_details[2]}' + # Initialize the DataGenerator with the 5-minute timeframe data_gen = DataGenerator('5m') # Create initial DataFrame and insert it into the cache df_initial = data_gen.create_table(num_rec=3, start=dt.datetime(2024, 8, 9, 0, 0, 0, tzinfo=dt.timezone.utc)) + print(f'Inserting this table into cache:\n{df_initial}\n') - self.data.set_cache_item(key=self.key, data=df_initial, cache_name='candles') + self.data.set_cache_item(key=candle_cache_key, data=df_initial, cache_name='candles') # Create new DataFrame to be added to the cache df_new = data_gen.create_table(num_rec=3, start=dt.datetime(2024, 8, 9, 0, 15, 0, tzinfo=dt.timezone.utc)) + print(f'Updating cache with this table:\n{df_new}\n') - self.data._update_candle_cache(more_records=df_new, key=self.key) + self.data._update_candle_cache(more_records=df_new, key=candle_cache_key) # Retrieve the resulting DataFrame from the cache - result = self.data.get_cache_item(key=self.key, cache_name='candles') + result = self.data.get_cache_item(key=candle_cache_key, cache_name='candles') print(f'The resulting table in cache is:\n{result}\n') # Create the expected DataFrame @@ -487,27 +970,75 @@ class TestDataCache(unittest.TestCase): print(f'The result time values match:\n{result["time"].tolist()}\n') print(' - Update cache with new records passed.') - def test_update_cached_dict(self): - print('Testing update_cached_dict() method:') + def analyze_cache_update(self, existing_records, more_records): + print("\n### Initial Data ###") + print("Existing Records:") + print(existing_records) + print("\nMore Records:") + print(more_records) - # Step 1: Set an empty dictionary in the cache for the specified key - print(f'Setting an empty dictionary in the cache with key: {self.key}') - self.data.set_cache_item(data={}, key=self.key) + # Column-by-column comparison + print("\n### Column Comparison ###") + for col in existing_records.columns: + if col in more_records.columns: + print(f"\nAnalyzing column: {col}") + print(f"Existing Records '{col}' values:\n{existing_records[col].tolist()}") + print(f"More Records '{col}' values:\n{more_records[col].tolist()}") + print(f"Existing Records '{col}' type: {existing_records[col].dtype}") + print(f"More Records '{col}' type: {more_records[col].dtype}") - # Step 2: Update the cached dictionary with a new key-value pair - print(f'Updating the cached dictionary with key: {self.key}, adding sub_key="sub_key" with value="value".') - self.data.update_cached_dict(cache_name='default_cache', cache_key=self.key, dict_key='sub_key', data='value') + # Check for duplicate rows based on the 'time' column + print("\n### Duplicate Detection ###") + combined = pd.concat([existing_records, more_records], ignore_index=True) + print("Combined Records (before removing duplicates):") + print(combined) - # Step 3: Retrieve the updated cache - print(f'Retrieving the updated dictionary from the cache with key: {self.key}') - cache = self.data.get_cache_item(key=self.key) + # Method 1: Drop duplicates keeping the last occurrence + no_duplicates_last = combined.drop_duplicates(subset='time', keep='last') + print("\nAfter Dropping Duplicates (keep='last'):") + print(no_duplicates_last) - # Step 4: Verify that the 'sub_key' in the cached dictionary has the correct value - print(f'Verifying that "sub_key" in the cached dictionary has the value "value".') - self.assertIsInstance(cache, dict, "The cache should be a dictionary.") - self.assertIn('sub_key', cache, "The 'sub_key' should be present in the cached dictionary.") - self.assertEqual(cache['sub_key'], 'value') - print(' - Update dictionary in cache passed.') + # Method 2: Drop duplicates keeping the first occurrence + no_duplicates_first = combined.drop_duplicates(subset='time', keep='first') + print("\nAfter Dropping Duplicates (keep='first'):") + print(no_duplicates_first) + + # Method 3: Ensure 'time' is in a consistent data type, and drop duplicates + combined['time'] = combined['time'].astype('int64') + consistent_time = combined.drop_duplicates(subset='time', keep='last') + print("\nAfter Dropping Duplicates with 'time' as int64:") + print(consistent_time) + + print("\n### Final Analysis ###") + print("Resulting DataFrame after sorting by 'time':") + final_result = consistent_time.sort_values(by='time').reset_index(drop=True) + print(final_result) + + def test_reproduce_duplicate_issue(self): + # Simulating DataFrames like in your original test + # Time as epoch timestamps + existing_records = pd.DataFrame({ + 'market_id': [1, 1, 1], + 'time': [1723161600000, 1723161900000, 1723162200000], + 'open': [100, 101, 102], + 'high': [110, 111, 112], + 'low': [90, 91, 92], + 'close': [105, 106, 107], + 'volume': [1000, 1001, 1002] + }) + + more_records = pd.DataFrame({ + 'market_id': [1, 1, 1], + 'time': [1723161600000, 1723161900000, 1723162500000], # Overlap at index 0 and 1 + 'open': [100, 101, 100], + 'high': [110, 111, 110], + 'low': [90, 91, 90], + 'close': [105, 106, 105], + 'volume': [1000, 1001, 1000] + }) + + # Run analysis + self.analyze_cache_update(existing_records, more_records) def _test_get_records_since(self, set_cache=True, set_db=True, query_offset=None, num_rec=None, ex_details=None, simulate_scenarios=None): @@ -614,6 +1145,10 @@ class TestDataCache(unittest.TestCase): print(' - Fetch records within the specified time range passed.') def test_get_records_since(self): + self.connect_exchanges() + self.set_up_database() + self.load_test_data() + print('\nTest get_records_since with records set in data') self._test_get_records_since() @@ -638,8 +1173,6 @@ class TestDataCache(unittest.TestCase): def test_other_timeframes(self): print('\nTest get_records_since with a different timeframe') - if 'candles' not in self.data.caches: - self.data.create_cache(cache_name='candles') ex_details = ['BTC/USD', '15m', 'binance', 'test_guy'] start_datetime = dt.datetime.now(dt.timezone.utc) - dt.timedelta(hours=2) @@ -707,58 +1240,61 @@ class TestDataCache(unittest.TestCase): print(' - Fetch candle data from exchange passed.') def test_remove_row(self): - print('Testing remove_row() method:') + # Step 1: Create the cache and insert data + self.data.create_cache('users', cache_type='table') # Create 'users' cache for this test - # Create a DataFrame to insert as the data - user_data = pd.DataFrame({ - 'user_name': ['test_user'], - 'password': ['test_password'] + # Insert test data into the cache and database + df = pd.DataFrame({ + 'user_name': ['Alice', 'Bob', 'Charlie'], + 'age': [30, 25, 40], + 'users_data': ['data1', 'data2', 'data3'], + 'data': ['info1', 'info2', 'info3'], + 'password': ['pass1', 'pass2', 'pass3'] }) + self.data.insert_df_into_datacache(df=df, cache_name="users", skip_cache=False) - # Insert data into the cache - self.data.set_cache_item( - cache_name='users', - key='user1', - data=user_data - ) + # Scenario 1: Remove a row from both cache and database + filter_vals = [('user_name', 'Bob')] + self.data.remove_row_from_datacache(cache_name="users", filter_vals=filter_vals, remove_from_db=True) - # Ensure the data is in the cache - cache_item = self.data.get_cache_item('user1', 'users') - self.assertIsNotNone(cache_item, "Data was not correctly inserted into the cache.") + # Verify the row was removed from the cache + cache = self.data.get_cache('users') + cached_data = cache.get_all_items() + self.assertEqual(len(cached_data), 2) # Only 2 rows should remain + self.assertNotIn('Bob', cached_data['user_name'].values) # Ensure 'Bob' is not in the cache - # The cache_item is a DataFrame, so we access the 'user_name' column directly - self.assertEqual(cache_item['user_name'].iloc[0], 'test_user', "Inserted data is incorrect.") + # Verify the row was removed from the database + with SQLite(db_file=self.db_file) as con: + remaining_users = pd.read_sql_query("SELECT * FROM users", con) + self.assertEqual(len(remaining_users), 2) # Ensure 2 rows remain in the database + self.assertNotIn('Bob', remaining_users['user_name'].values) # Ensure 'Bob' is not in the database - # Remove the row from the cache only (soft delete) - self.data.remove_row(cache_name='users', filter_vals=('user_name', 'test_user'), remove_from_db=False) + # Scenario 2: Remove a row from the cache only (not from the database) + filter_vals = [('user_name', 'Charlie')] + self.data.remove_row_from_datacache(cache_name="users", filter_vals=filter_vals, remove_from_db=False) - # Verify the row has been removed from the cache - cache_item = self.data.get_cache_item('user1', 'users') - self.assertIsNone(cache_item, "Row was not correctly removed from the cache.") + # Verify the row was removed from the cache + cached_data = cache.get_all_items() + self.assertEqual(len(cached_data), 1) # Only 1 row should remain in the cache + self.assertNotIn('Charlie', cached_data['user_name'].values) # Ensure 'Charlie' is not in the cache - # Reinsert the data for hard delete test - self.data.set_cache_item( - cache_name='users', - key='user1', - data=user_data - ) + # Verify the row still exists in the database + with SQLite(db_file=self.db_file) as con: + remaining_users = pd.read_sql_query("SELECT * FROM users", con) + self.assertEqual(len(remaining_users), 2) # Ensure Charlie is still in the database + self.assertIn('Charlie', remaining_users['user_name'].values) # Charlie should still exist in the database - # Mock database delete by adding the row to the database - self.data.db.insert_row(table='users', columns=('user_name', 'password'), values=('test_user', 'test_password')) + # Scenario 3: Try removing from a non-existing cache (expecting KeyError) + filter_vals = [('user_name', 'Bob')] + with self.assertRaises(KeyError) as context: + self.data.remove_row_from_datacache(cache_name="non_existing_cache", filter_vals=filter_vals, remove_from_db=True) + self.assertEqual(context.exception.args[0], "Cache: non_existing_cache, does not exist.") - # Remove the row from both cache and database (hard delete) - self.data.remove_row(cache_name='users', filter_vals=('user_name', 'test_user'), remove_from_db=True) - - # Verify the row has been removed from the cache - cache_item = self.data.get_cache_item('user1', 'users') - self.assertIsNone(cache_item, "Row was not correctly removed from the cache.") - - # Verify the row has been removed from the database - with SQLite(self.db_file) as con: - result = pd.read_sql(f'SELECT * FROM users WHERE user_name="test_user"', con) - self.assertTrue(result.empty, "Row was not correctly removed from the database.") - - print(' - Remove row from cache and database passed.') + # Scenario 4: Invalid filter_vals format (expecting ValueError) + invalid_filter_vals = 'invalid_filter' # Not a list of tuples + with self.assertRaises(ValueError) as context: + self.data.remove_row_from_datacache(cache_name="users", filter_vals=invalid_filter_vals, remove_from_db=True) + self.assertEqual(str(context.exception), "filter_vals must be a list of tuples (column, value)") def test_timeframe_to_timedelta(self): print('Testing timeframe_to_timedelta() function:') @@ -875,8 +1411,7 @@ class TestDataCache(unittest.TestCase): print(' - All estimate_record_count() tests passed.') def test_get_or_fetch_rows(self): - - # Create a mock table in the cache with multiple entries + # Create mock DataFrames for different users df1 = pd.DataFrame({ 'user_name': ['billy'], 'password': ['1234'], @@ -895,28 +1430,28 @@ class TestDataCache(unittest.TestCase): 'exchanges': [['ex7', 'ex8', 'ex9']] }) - # Insert these DataFrames into the 'users' cache - self.data.create_cache('users', cache_type=InMemoryCache) + # Insert these DataFrames into the 'users' cache with row-based caching + self.data.create_cache('users', cache_type='row') # Assuming 'row' cache type for this test self.data.set_cache_item(key='user_billy', data=df1, cache_name='users') self.data.set_cache_item(key='user_john', data=df2, cache_name='users') self.data.set_cache_item(key='user_alice', data=df3, cache_name='users') print('Testing get_or_fetch_rows() method:') - # Test fetching an existing user from the cache - result = self.data.get_or_fetch_rows('users', ('user_name', 'billy')) + # Fetch user directly by key since this is a row-based cache + result = self.data.get_cache_item(key='user_billy', cache_name='users') self.assertIsInstance(result, pd.DataFrame, "Failed to fetch DataFrame from cache") self.assertFalse(result.empty, "The fetched DataFrame is empty") self.assertEqual(result.iloc[0]['password'], '1234', "Incorrect data fetched from cache") - # Test fetching another user from the cache - result = self.data.get_or_fetch_rows('users', ('user_name', 'john')) + # Fetch another user by key + result = self.data.get_cache_item(key='user_john', cache_name='users') self.assertIsInstance(result, pd.DataFrame, "Failed to fetch DataFrame from cache") self.assertFalse(result.empty, "The fetched DataFrame is empty") self.assertEqual(result.iloc[0]['password'], '5678', "Incorrect data fetched from cache") # Test fetching a user that does not exist in the cache - result = self.data.get_or_fetch_rows('users', ('user_name', 'non_existent_user')) + result = self.data.get_cache_item(key='non_existent_user', cache_name='users') # Check if result is None (indicating that no data was found) self.assertIsNone(result, "Expected result to be None for a non-existent user") @@ -925,7 +1460,7 @@ class TestDataCache(unittest.TestCase): def test_is_attr_taken(self): # Create a cache named 'users' - self.data.create_cache('users', cache_type=InMemoryCache) + user_cache = self.data.create_cache('users', cache_type='table') # Create mock data for three users user_data_1 = pd.DataFrame({ @@ -945,57 +1480,69 @@ class TestDataCache(unittest.TestCase): }) # Insert mock data into the cache - self.data.set_cache_item('user1', user_data_1, cache_name='users') - self.data.set_cache_item('user2', user_data_2, cache_name='users') - self.data.set_cache_item('user3', user_data_3, cache_name='users') + self.data.set_cache_item(cache_name='users', data=user_data_1) + self.data.set_cache_item(cache_name='users', data=user_data_2) + self.data.set_cache_item(cache_name='users', data=user_data_3) # Test when attribute value is taken - result_taken = self.data.is_attr_taken(cache_name='users', attr='user_name', val='billy') + result_taken = user_cache.is_attr_taken('user_name', 'billy') self.assertTrue(result_taken, "Expected 'billy' to be taken, but it was not.") # Test when attribute value is not taken - result_not_taken = self.data.is_attr_taken(cache_name='users', attr='user_name', val='charlie') + result_not_taken = user_cache.is_attr_taken('user_name', 'charlie') self.assertFalse(result_not_taken, "Expected 'charlie' not to be taken, but it was.") - def test_insert_df(self): - print('Testing insert_df() method:') + def test_insert_df_row_based_cache(self): + self._test_insert_df(cache_type='row') - # Create a DataFrame to insert + def test_insert_df_table_based_cache(self): + self._test_insert_df(cache_type='table') + + def _test_insert_df(self, cache_type): + self.data.create_cache('users', cache_type=cache_type) # Create 'users' cache for this test + + # Arrange: Create a simple DataFrame to insert df = pd.DataFrame({ - 'user_name': ['Alice'], - 'age': [30], - 'users_data': ['user_data_1'], - 'data': ['additional_data'], - 'password': ['1234'] + 'user_name': ['Alice', 'Bob'], + 'age': [30, 25], + 'users_data': ['data1', 'data2'], + 'data': ['info1', 'info2'], + 'password': ['pass1', 'pass2'] }) - # Insert data into the database and cache - self.data.insert_df(df=df, cache_name='users') + # Ensure the users table exists in the database and clear any existing data + with SQLite(db_file=self.db_file) as con: + con.execute("DELETE FROM users;") # Clear existing data for clean testing - # Assume the database will return an auto-incremented ID starting at 1 - auto_incremented_id = 1 + # Act: Insert the DataFrame into the 'users' table without skipping the cache + self.data.insert_df_into_datacache(df=df, cache_name="users", skip_cache=False) - # Verify that the data was added to the cache using the auto-incremented ID as the key - cached_df = self.data.get_cache_item(key=str(auto_incremented_id), cache_name='users') + # Assert: Verify the data was correctly inserted into the database + with SQLite(db_file=self.db_file) as con: + inserted_users = pd.read_sql_query("SELECT * FROM users", con) - # Check that the DataFrame in the cache matches the original DataFrame - pd.testing.assert_frame_equal(cached_df, df, check_dtype=False) + self.assertEqual(len(inserted_users), 2) # Ensure both rows are inserted + self.assertEqual(inserted_users.iloc[0]['user_name'], 'Alice') # Verify first row data + self.assertEqual(inserted_users.iloc[1]['user_name'], 'Bob') # Verify second row data - # Now, let's verify the data was inserted into the database - with SQLite(self.data.db.db_file) as conn: - # Query the users table for the inserted data - query_result = pd.read_sql_query(f"SELECT * FROM users WHERE id = {auto_incremented_id}", conn) + # Verify cache behavior (RowBasedCache) + cache = self.data.get_cache('users') + if isinstance(cache, RowBasedCache): + # Check if each row is added to the cache (in row-based cache) + cached_user1 = cache.get_entry('Alice') + cached_user2 = cache.get_entry('Bob') - # Verify the database content matches the inserted DataFrame - expected_db_df = df.copy() - expected_db_df['id'] = auto_incremented_id # Add the auto-incremented ID to the expected DataFrame - # Align column order - expected_db_df = expected_db_df[['id', 'user_name', 'age', 'users_data', 'data', 'password']] + self.assertIsNotNone(cached_user1) # Ensure user 'Alice' is cached + self.assertIsNotNone(cached_user2) # Ensure user 'Bob' is cached + self.assertEqual(cached_user1.iloc[0]['user_name'], 'Alice') # Verify cache content for Alice + self.assertEqual(cached_user2.iloc[0]['user_name'], 'Bob') # Verify cache content for Bob - # Check that the database DataFrame matches the expected DataFrame - pd.testing.assert_frame_equal(query_result, expected_db_df, check_dtype=False) - - print(' - Data insertion into cache and database verified successfully.') + elif isinstance(cache, TableBasedCache): + # For TableBasedCache, check if the entire DataFrame is cached + cached_data = cache.get_all_items() + self.assertEqual(len(cached_data), 2) # Ensure both rows are cached + self.assertEqual(cached_data.iloc[0]['user_name'], 'Alice') + self.assertEqual(cached_data.iloc[1]['user_name'], 'Bob') def test_insert_row(self): print("Testing insert_row() method:") @@ -1005,14 +1552,15 @@ class TestDataCache(unittest.TestCase): columns = ('user_name', 'age') values = ('Alice', 30) - # Create the cache first - self.data.create_cache(cache_name, cache_type=InMemoryCache) + # Create the cache with a row-based cache type (if that's how your system works now) + user_cache = self.data.create_cache(cache_name, cache_type='row') # Insert a row into the cache and database without skipping the cache - self.data.insert_row(cache_name=cache_name, columns=columns, values=values, skip_cache=False) + # Ensure 'key' is passed correctly, if needed (depending on how `insert_row` works now) + self.data.insert_row_into_datacache(cache_name=cache_name, columns=columns, values=values, key='1', skip_cache=False) - # Retrieve the inserted item from the cache - result = self.data.get_cache_item(key='1', cache_name=cache_name) + # Retrieve the inserted item from the cache using the correct method + result = user_cache.get_entry(key='1') # Assert that the data in the cache matches what was inserted self.assertIsNotNone(result, "No data found in the cache for the inserted ID.") @@ -1024,10 +1572,10 @@ class TestDataCache(unittest.TestCase): print("Testing insert_row() with skip_cache=True") # Insert another row into the database, this time skipping the cache - self.data.insert_row(cache_name=cache_name, columns=columns, values=('Bob', 40), skip_cache=True) + self.data.insert_row_into_datacache(cache_name=cache_name, columns=columns, values=('Bob', 40), key='2', skip_cache=True) # Attempt to retrieve the newly inserted row from the cache - result_after_skip = self.data.get_cache_item(key='2', cache_name=cache_name) + result_after_skip = user_cache.get_entry(key='2') # Assert that no data is found in the cache for the new row self.assertIsNone(result_after_skip, "Data should not have been cached when skip_cache=True.") @@ -1052,48 +1600,62 @@ class TestDataCache(unittest.TestCase): print(' - _fill_data_holes passed.') def test_get_cache_item(self): - # Case 1: Retrieve a stored Indicator instance (serialized) + self.load_prerequisites() + # Case 1: Retrieve a stored Indicator instance (serialized and deserialized) indicator = Indicator(name='SMA', indicator_type='SMA', properties={'period': 5}) - self.data.set_cache_item('indicator_key', indicator, cache_name='indicators') + + # Create a row-based cache for indicators and store serialized Indicator data + self.data.create_cache('indicators', cache_type='row') + self.data.set_cache_item(key='indicator_key', data=indicator, cache_name='indicators') + + # Retrieve the indicator and check for deserialization stored_data = self.data.get_cache_item('indicator_key', cache_name='indicators') self.assertIsInstance(stored_data, Indicator, "Failed to retrieve and deserialize the Indicator instance") # Case 2: Retrieve non-Indicator data (e.g., dict) - data = {'key': 'value'} - self.data.set_cache_item('non_indicator_key', data) - stored_data = self.data.get_cache_item('non_indicator_key') - self.assertEqual(stored_data, data, "Failed to retrieve non-Indicator data correctly") + data_dict = {'key': 'value'} - # Case 3: Retrieve expired cache item (should return None) - self.data.set_cache_item('expiring_key', 'test_data', expire_delta=dt.timedelta(seconds=1)) - time.sleep(2) # Wait for the cache to expire - self.assertIsNone(self.data.get_cache_item('expiring_key'), "Expired cache item should return None") + # Create a cache for generic data (row-based) + self.data.create_cache('default_cache', cache_type='row') - # Case 4: Retrieve non-existent key (should return None) - self.assertIsNone(self.data.get_cache_item('non_existent_key'), "Non-existent key should return None") + # Store a dictionary + self.data.set_cache_item(key='dict_key', data=data_dict, cache_name='default_cache') - # Case 5: Retrieve with invalid key type (should raise ValueError) - with self.assertRaises(ValueError): - self.data.get_cache_item(12345) # Invalid key type + # Retrieve and check if the data matches the original dict + stored_data = self.data.get_cache_item('dict_key', cache_name='default_cache') + self.assertEqual(stored_data, data_dict, "Failed to retrieve non-Indicator data correctly") - # Case 6: Test Deserialization Failure - # Simulate corrupted serialized data - corrupted_data = b'\x80\x03corrupted_data' - self.data.set_cache_item('corrupted_key', corrupted_data, cache_name='indicators') - with self.assertLogs(level='ERROR') as log: - self.assertIsNone(self.data.get_cache_item('corrupted_key', cache_name='indicators')) - self.assertIn("Deserialization failed", log.output[0]) + # Case 3: Retrieve a list stored in the cache + data_list = [1, 2, 3, 4, 5] - # Case 7: Test Cache Eviction - # Create a cache with a limit of 2 items - self.data.set_cache_item('key1', 'data1', cache_name='test_cache', limit=2) - self.data.set_cache_item('key2', 'data2', cache_name='test_cache', limit=2) - self.data.set_cache_item('key3', 'data3', cache_name='test_cache', limit=2) + # Store a list in row-based cache + self.data.set_cache_item(key='list_key', data=data_list, cache_name='default_cache') - # Verify that the oldest item (key1) has been evicted - self.assertIsNone(self.data.get_cache_item('key1', cache_name='test_cache')) - self.assertEqual(self.data.get_cache_item('key2', cache_name='test_cache'), 'data2') - self.assertEqual(self.data.get_cache_item('key3', cache_name='test_cache'), 'data3') + # Retrieve and check if the data matches the original list + stored_data = self.data.get_cache_item('list_key', cache_name='default_cache') + self.assertEqual(stored_data, data_list, "Failed to retrieve list data correctly") + + # Case 4: Retrieve a DataFrame stored in the cache (Table-Based Cache) + data_df = pd.DataFrame({ + 'column1': [10, 20, 30], + 'column2': ['A', 'B', 'C'] + }) + + # Create a table-based cache + self.data.create_cache('table_cache', cache_type='table') + + # Store a DataFrame in table-based cache + self.data.set_cache_item(key='testkey', data=data_df, cache_name='table_cache') + + # Retrieve and check if the DataFrame matches the original + stored_data = self.data.get_cache_item(key='testkey', cache_name='table_cache') + pd.testing.assert_frame_equal(stored_data, data_df) + + # Case 5: Attempt to retrieve a non-existent key + non_existent = self.data.get_cache_item('non_existent_key', cache_name='default_cache') + self.assertIsNone(non_existent, "Expected None for non-existent cache key") + + print(" - All get_cache_item tests passed.") def test_set_user_indicator_properties(self): # Case 1: Store user-specific display properties @@ -1166,26 +1728,83 @@ class TestDataCache(unittest.TestCase): exchange_name) # Invalid user_id type def test_set_cache_item(self): - # Case 1: Store and retrieve an Indicator instance (serialized) + data_cache = self.data + + # ------------------------- + # Row-Based Cache Test Cases + # ------------------------- + # Case 1: Store and retrieve an item in a RowBasedCache with a key + data_cache.create_cache('row_cache', cache_type='row') # Create row-based cache + key = 'row_key' + data = {'some': 'data'} + + data_cache.set_cache_item(cache_name='row_cache', data=data, key=key) + cached_item = data_cache.get_cache_item(key, cache_name='row_cache') + self.assertEqual(cached_item, data, "Failed to store and retrieve data in RowBasedCache") + + # Case 2: Store and retrieve an Indicator instance (serialization) indicator = Indicator(name='SMA', indicator_type='SMA', properties={'period': 5}) - self.data.set_cache_item('indicator_key', indicator, cache_name='indicators') - stored_data = self.data.get_cache_item('indicator_key', cache_name='indicators') - self.assertIsInstance(stored_data, Indicator, "Failed to deserialize the Indicator instance") + data_cache.set_cache_item(cache_name='row_cache', data=indicator, key='indicator_key') + cached_indicator = data_cache.get_cache_item('indicator_key', cache_name='row_cache') - # Case 2: Store and retrieve non-Indicator data (e.g., dict) - data = {'key': 'value'} - self.data.set_cache_item('non_indicator_key', data) - stored_data = self.data.get_cache_item('non_indicator_key') - self.assertEqual(stored_data, data, "Non-Indicator data was modified or not stored correctly") + # Assert that the data was correctly serialized and deserialized + self.assertIsInstance(pickle.loads(cached_indicator), Indicator, "Failed to deserialize Indicator instance") - # Case 3: Handle invalid key type (non-string) - with self.assertRaises(ValueError): - self.data.set_cache_item(12345, 'test_data') # Invalid key type + # Case 3: Prevent overwriting an existing key if do_not_overwrite=True + new_data = {'new': 'data'} + data_cache.set_cache_item(cache_name='row_cache', data=new_data, key=key, do_not_overwrite=True) + cached_item_after = data_cache.get_cache_item(key, cache_name='row_cache') + self.assertEqual(cached_item_after, data, "Overwriting occurred when it should have been prevented") - # Case 4: Cache item expiration (item should expire after set time) - self.data.set_cache_item('expiring_key', 'test_data', expire_delta=dt.timedelta(seconds=1)) - time.sleep(2) # Wait for expiration time - self.assertIsNone(self.data.get_cache_item('expiring_key'), "Cached item did not expire as expected") + # Case 4: Raise ValueError if key is None in RowBasedCache + with self.assertRaises(ValueError, msg="RowBasedCache requires a key to store the data."): + data_cache.set_cache_item(cache_name='row_cache', data=data, key=None) + + # ------------------------- + # Table-Based Cache Test Cases + # ------------------------- + # Case 5: Store and retrieve a DataFrame in a TableBasedCache + data_cache.create_cache('table_cache', cache_type='table') # Create table-based cache + df = pd.DataFrame({'col1': [1, 2], 'col2': ['A', 'B']}) + + data_cache.set_cache_item(cache_name='table_cache', data=df, key='table_key') + cached_df = data_cache.get_cache_item('table_key', cache_name='table_cache') + pd.testing.assert_frame_equal(cached_df, df, "Failed to store and retrieve DataFrame in TableBasedCache") + + # Case 6: Prevent overwriting an existing key if do_not_overwrite=True in TableBasedCache + new_df = pd.DataFrame({'col1': [3, 4], 'col2': ['C', 'D']}) + data_cache.set_cache_item(cache_name='table_cache', data=new_df, key='table_key', do_not_overwrite=True) + cached_df_after = data_cache.get_cache_item('table_key', cache_name='table_cache') + pd.testing.assert_frame_equal(cached_df_after, df, "Overwriting occurred when it should have been prevented") + + # Case 7: Raise ValueError if non-DataFrame data is provided in TableBasedCache + with self.assertRaises(ValueError, msg="TableBasedCache can only store DataFrames."): + data_cache.set_cache_item(cache_name='table_cache', data={'not': 'a dataframe'}, key='table_key') + + # ------------------------- + # Expiration Handling Test Case + # ------------------------- + # Case 8: Store an item with an expiration time (RowBasedCache) + key = 'expiring_key' + data = {'some': 'data'} + expire_delta = dt.timedelta(seconds=5) + + data_cache.set_cache_item(cache_name='row_cache', data=data, key=key, expire_delta=expire_delta) + cached_item = data_cache.get_cache_item(key, cache_name='row_cache') + self.assertEqual(cached_item, data, "Failed to store and retrieve data with expiration") + + # Wait for expiration to occur (ensure data is removed after expiration) + import time + time.sleep(6) + expired_item = data_cache.get_cache_item(key, cache_name='row_cache') + self.assertIsNone(expired_item, "Data was not removed after expiration time") + + # ------------------------- + # Invalid Cache Type Test Case + # ------------------------- + # Case 9: Raise ValueError if unsupported cache type is provided + with self.assertRaises(KeyError, msg="Unsupported cache type for 'unsupported_cache'"): + data_cache.set_cache_item(cache_name='unsupported_cache', data={'some': 'data'}, key='some_key') def test_calculate_and_cache_indicator(self): # Testing the calculation and caching of an indicator through DataCache (which includes IndicatorCache @@ -1323,7 +1942,7 @@ class TestDataCache(unittest.TestCase): cache_key = self.data._make_indicator_key('BTC/USD', '5m', 'binance', 'SMA', properties['period']) # Store the cached data as DataFrame (no need for to_dict('records')) - self.data.set_cache_item(cache_key, cached_data, cache_name='indicator_data') + self.data.set_cache_item(cache_name='indicator_data',data=cached_data, key=cache_key) # Print cached data to inspect its range print("Cached data time range:")