Spaces:

ParthSadaria
/

lokiai

Running

App Files Files Community

ParthSadaria commited on 6 days ago

Commit

d5a05cc

verified ·

1 Parent(s): 925b0de

Update usage_tracker.py

Browse files

Files changed (1) hide show

usage_tracker.py +161 -35

usage_tracker.py CHANGED Viewed

@@ -2,7 +2,7 @@ import json
 import os
 import datetime
 import threading
-from collections import Counter, defaultdict
 from typing import List, Dict, Any, Optional
 from fastapi import Request
@@ -14,56 +14,121 @@ class UsageTracker:
         self._schedule_save()
     def _load_data(self) -> Dict[str, Any]:
-        """Loads usage data from the JSON file, ensuring data integrity."""
         if os.path.exists(self.data_file):
             try:
                 with open(self.data_file, 'r') as f:
                     data = json.load(f)
-                    if isinstance(data, dict) and 'requests' in data:
                         return data
-                    # If data is old format, try to convert it
-                    if isinstance(data.get('total_requests'), int):
                         return self._convert_old_format(data)
-            except (json.JSONDecodeError, TypeError):
-                print(f"Warning: Could not decode JSON from {self.data_file}. Starting fresh.")
         return self._initialize_empty_data()
-    def _initialize_empty_data(self) -> Dict[str, List]:
-        """Initializes a new data structure for usage tracking."""
-        return {'requests': []}
-    def _convert_old_format(self, old_data: Dict[str, Any]) -> Dict[str, List]:
-        """Converts data from the old format to the new detailed format."""
         print("Converting old usage data format to new format.")
         new_data = self._initialize_empty_data()
-        # This is a simplification; a more robust conversion would be needed for full data recovery
-        # For now, we are just starting fresh with the new structure to avoid complexity.
         return new_data
     def save_data(self):
         """Saves current usage data to the JSON file periodically."""
         with self.lock:
             try:
                 with open(self.data_file, 'w') as f:
-                    json.dump(self.data, f, indent=4)
             except IOError as e:
                 print(f"Error saving usage data to {self.data_file}: {e}")
     def _schedule_save(self):
         """Schedules the data to be saved every 60 seconds."""
         threading.Timer(60.0, self._schedule_save).start()
         self.save_data()
-    def record_request(self, request: Optional[Request], model: str = "unknown", endpoint: str = "unknown"):
-        """Records a single API request with detailed information."""
         with self.lock:
             now = datetime.datetime.now(datetime.timezone.utc)
-            ip_address = "N/A"
-            user_agent = "N/A"
-            if request:
-                ip_address = request.client.host
-                user_agent = request.headers.get("user-agent", "N/A")
             self.data['requests'].append({
                 'timestamp': now.isoformat(),
                 'model': model,
@@ -72,45 +137,106 @@ class UsageTracker:
                 'user_agent': user_agent,
             })
     def get_usage_summary(self, days: int = 7) -> Dict[str, Any]:
-        """Generates a comprehensive summary of usage data."""
         with self.lock:
             summary = {
                 'total_requests': 0,
-                'model_usage': defaultdict(int),
-                'endpoint_usage': defaultdict(int),
-                'daily_usage': defaultdict(lambda: defaultdict(int)),
-                'unique_ips': set(),
                 'recent_requests': []
             }
             cutoff_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=days)
-            # Iterate backwards for recent requests
             for req in reversed(self.data['requests']):
                 req_time = datetime.datetime.fromisoformat(req['timestamp'])
-                # Update total requests (for all time)
                 summary['total_requests'] += 1
-                summary['unique_ips'].add(req['ip_address'])
                 if req_time >= cutoff_date:
                     date_str = req_time.strftime("%Y-%m-%d")
-                    # Aggregate data for charts and tables
                     summary['model_usage'][req['model']] += 1
                     summary['endpoint_usage'][req['endpoint']] += 1
                     summary['daily_usage'][date_str]['requests'] += 1
-                    # Add to recent requests list
                     if len(summary['recent_requests']) < 20:
                          summary['recent_requests'].append(req)
             # Sort daily usage by date
             summary['daily_usage'] = dict(sorted(summary['daily_usage'].items()))
-            summary['unique_ip_count'] = len(summary['unique_ips'])
-            del summary['unique_ips'] # No need to send the whole set
             return summary

 import os
 import datetime
 import threading
+from collections import defaultdict
 from typing import List, Dict, Any, Optional
 from fastapi import Request
         self._schedule_save()
     def _load_data(self) -> Dict[str, Any]:
+        """
+        Loads usage data from the JSON file, ensuring data integrity.
+        Handles cases where the file might be corrupted or in an old format.
+        """
         if os.path.exists(self.data_file):
             try:
                 with open(self.data_file, 'r') as f:
                     data = json.load(f)
+                    # Check if data is in the expected new format
+                    if isinstance(data, dict) and 'requests' in data and 'models' in data and 'api_endpoints' in data:
                         return data
+                    # If data is in an older, simpler format, convert it
+                    elif isinstance(data, dict) and 'total_requests' in data: # Heuristic for old format
                         return self._convert_old_format(data)
+            except (json.JSONDecodeError, TypeError) as e:
+                print(f"Warning: Could not decode JSON from {self.data_file} ({e}). Starting fresh.")
         return self._initialize_empty_data()
+    def _initialize_empty_data(self) -> Dict[str, Any]:
+        """
+        Initializes a new, empty data structure for usage tracking.
+        This structure includes a list for all requests, and dictionaries
+        to store aggregated data for models and API endpoints.
+        """
+        return {
+            'requests': [],
+            'models': defaultdict(lambda: {'total_requests': 0, 'first_used': None, 'last_used': None}),
+            'api_endpoints': defaultdict(lambda: {'total_requests': 0, 'first_used': None, 'last_used': None})
+        }
+    def _convert_old_format(self, old_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Converts data from the old format to the new detailed format.
+        This is a crucial step to avoid data loss on updates.
+        It iterates through old 'requests' (if any) and re-records them
+        into the new structured format.
+        """
         print("Converting old usage data format to new format.")
         new_data = self._initialize_empty_data()
+        # Preserve existing requests if they follow a basic structure
+        if 'requests' in old_data and isinstance(old_data['requests'], list):
+            for req in old_data['requests']:
+                # Attempt to extract relevant fields from old request entry
+                timestamp_str = req.get('timestamp')
+                model_name = req.get('model', 'unknown_model')
+                endpoint_name = req.get('endpoint', 'unknown_endpoint')
+                ip_address = req.get('ip_address', 'N/A')
+                user_agent = req.get('user_agent', 'N/A')
+                # Ensure timestamp is valid and parseable
+                try:
+                    timestamp = datetime.datetime.fromisoformat(timestamp_str) if timestamp_str else datetime.datetime.now(datetime.timezone.utc)
+                except ValueError:
+                    timestamp = datetime.datetime.now(datetime.timezone.utc) # Fallback if timestamp is malformed
+                new_data['requests'].append({
+                    'timestamp': timestamp.isoformat(),
+                    'model': model_name,
+                    'endpoint': endpoint_name,
+                    'ip_address': ip_address,
+                    'user_agent': user_agent,
+                })
+                # Update aggregated stats for models and endpoints
+                # This ensures that even old data contributes to the new summary
+                if not new_data['models'][model_name]['first_used'] or timestamp < datetime.datetime.fromisoformat(new_data['models'][model_name]['first_used']):
+                    new_data['models'][model_name]['first_used'] = timestamp.isoformat()
+                if not new_data['models'][model_name]['last_used'] or timestamp > datetime.datetime.fromisoformat(new_data['models'][model_name]['last_used']):
+                    new_data['models'][model_name]['last_used'] = timestamp.isoformat()
+                new_data['models'][model_name]['total_requests'] += 1
+                if not new_data['api_endpoints'][endpoint_name]['first_used'] or timestamp < datetime.datetime.fromisoformat(new_data['api_endpoints'][endpoint_name]['first_used']):
+                    new_data['api_endpoints'][endpoint_name]['first_used'] = timestamp.isoformat()
+                if not new_data['api_endpoints'][endpoint_name]['last_used'] or timestamp > datetime.datetime.fromisoformat(new_data['api_endpoints'][endpoint_name]['last_used']):
+                    new_data['api_endpoints'][endpoint_name]['last_used'] = timestamp.isoformat()
+                new_data['api_endpoints'][endpoint_name]['total_requests'] += 1
+        print("Data conversion complete.")
         return new_data
     def save_data(self):
         """Saves current usage data to the JSON file periodically."""
         with self.lock:
             try:
+                # Convert defaultdicts to regular dicts for JSON serialization
+                serializable_data = {
+                    'requests': self.data['requests'],
+                    'models': dict(self.data['models']),
+                    'api_endpoints': dict(self.data['api_endpoints'])
+                }
                 with open(self.data_file, 'w') as f:
+                    json.dump(serializable_data, f, indent=4)
             except IOError as e:
                 print(f"Error saving usage data to {self.data_file}: {e}")
     def _schedule_save(self):
         """Schedules the data to be saved every 60 seconds."""
+        # Use a non-daemon thread for saving to ensure it runs even if main thread exits
+        # if using daemon threads, ensure proper shutdown hook is in place.
+        # For simplicity in this context, a direct Timer call is fine.
         threading.Timer(60.0, self._schedule_save).start()
         self.save_data()
+    def record_request(self, request: Optional[Request] = None, model: str = "unknown", endpoint: str = "unknown"):
+        """
+        Records a single API request with detailed information.
+        Updates both the raw request list and aggregated statistics.
+        """
         with self.lock:
             now = datetime.datetime.now(datetime.timezone.utc)
+            ip_address = request.client.host if request and request.client else "N/A"
+            user_agent = request.headers.get("user-agent", "N/A") if request else "N/A"
+            # Append to raw requests list
             self.data['requests'].append({
                 'timestamp': now.isoformat(),
                 'model': model,
                 'user_agent': user_agent,
             })
+            # Update model specific stats
+            model_stats = self.data['models'][model]
+            model_stats['total_requests'] += 1
+            if model_stats['first_used'] is None or now < datetime.datetime.fromisoformat(model_stats['first_used']):
+                model_stats['first_used'] = now.isoformat()
+            if model_stats['last_used'] is None or now > datetime.datetime.fromisoformat(model_stats['last_used']):
+                model_stats['last_used'] = now.isoformat()
+            # Update endpoint specific stats
+            endpoint_stats = self.data['api_endpoints'][endpoint]
+            endpoint_stats['total_requests'] += 1
+            if endpoint_stats['first_used'] is None or now < datetime.datetime.fromisoformat(endpoint_stats['first_used']):
+                endpoint_stats['first_used'] = now.isoformat()
+            if endpoint_stats['last_used'] is None or now > datetime.datetime.fromisoformat(endpoint_stats['last_used']):
+                endpoint_stats['last_used'] = now.isoformat()
     def get_usage_summary(self, days: int = 7) -> Dict[str, Any]:
+        """
+        Generates a comprehensive summary of usage data for the specified number of days.
+        Includes total requests, model usage, endpoint usage, daily usage, and unique IPs.
+        """
         with self.lock:
             summary = {
                 'total_requests': 0,
+                'model_usage': defaultdict(int),      # Requests per model for the period
+                'endpoint_usage': defaultdict(int),    # Requests per endpoint for the period
+                'daily_usage': defaultdict(lambda: {'requests': 0, 'unique_ips': set()}), # Daily stats
+                'unique_ips_total': set(),             # Unique IPs across all requests
                 'recent_requests': []
             }
+            # Prepare data for model and API endpoint charts
+            # These are based on the aggregated 'self.data' which covers all time,
+            # but the summary 'model_usage' and 'endpoint_usage' below are for the given 'days' period.
+            all_time_model_data = {
+                model: {
+                    'total_requests': stats['total_requests'],
+                    'first_used': stats['first_used'],
+                    'last_used': stats['last_used']
+                } for model, stats in self.data['models'].items()
+            }
+            all_time_endpoint_data = {
+                endpoint: {
+                    'total_requests': stats['total_requests'],
+                    'first_used': stats['first_used'],
+                    'last_used': stats['last_used']
+                } for endpoint, stats in self.data['api_endpoints'].items()
+            }
             cutoff_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=days)
+            # Iterate backwards for recent requests and aggregate data for the specified period
+            requests_for_period = []
             for req in reversed(self.data['requests']):
                 req_time = datetime.datetime.fromisoformat(req['timestamp'])
+                # Always update total requests and unique IPs for all time
                 summary['total_requests'] += 1
+                summary['unique_ips_total'].add(req['ip_address'])
                 if req_time >= cutoff_date:
+                    requests_for_period.append(req)
                     date_str = req_time.strftime("%Y-%m-%d")
+                    # Aggregate data for charts and tables for the given period
                     summary['model_usage'][req['model']] += 1
                     summary['endpoint_usage'][req['endpoint']] += 1
                     summary['daily_usage'][date_str]['requests'] += 1
+                    summary['daily_usage'][date_str]['unique_ips'].add(req['ip_address'])
+                    # Add to recent requests list (up to 20)
                     if len(summary['recent_requests']) < 20:
                          summary['recent_requests'].append(req)
+            # Convert daily unique IPs set to count
+            for date_str, daily_stats in summary['daily_usage'].items():
+                daily_stats['unique_ips_count'] = len(daily_stats['unique_ips'])
+                del daily_stats['unique_ips'] # Remove the set before returning
             # Sort daily usage by date
             summary['daily_usage'] = dict(sorted(summary['daily_usage'].items()))
+            # Convert defaultdicts to regular dicts for final summary
+            summary['model_usage_period'] = dict(summary['model_usage'])
+            summary['endpoint_usage_period'] = dict(summary['endpoint_usage'])
+            summary['daily_usage_period'] = dict(summary['daily_usage'])
+            # Add all-time data
+            summary['all_time_model_usage'] = all_time_model_data
+            summary['all_time_endpoint_usage'] = all_time_endpoint_data
+            summary['unique_ips_total_count'] = len(summary['unique_ips_total'])
+            del summary['unique_ips_total'] # No need to send the whole set
+            # Clean up defaultdicts that are not needed in the final output structure
+            del summary['model_usage']
+            del summary['endpoint_usage']
+            del summary['daily_usage']
             return summary