Spaces:

dtrovato997
/

SpeechAnalysisDemo

Paused

App Files Files Community

dtrovato997 commited on May 28

Commit

825d7f4

1 Parent(s): 87728c7

improved logging and persistent storage on HF hub

Browse files

Files changed (4) hide show

Dockerfile +6 -2
main.py +149 -13
models/age_and_gender_model.py +20 -5
models/nationality_model.py +24 -6

Dockerfile CHANGED Viewed

@@ -32,8 +32,12 @@ WORKDIR $HOME/app
 # Copy application code with proper ownership
 COPY --chown=user . $HOME/app
-# Create directories with proper permissions
-RUN mkdir -p $HOME/app/uploads $HOME/app/cache
 # Expose port 7860 (HF Spaces default)
 EXPOSE 7860

 # Copy application code with proper ownership
 COPY --chown=user . $HOME/app
+# Create uploads directory in app folder (for temporary files)
+RUN mkdir -p $HOME/app/uploads
+# Create symbolic link from /data to cache (if /data exists)
+# This will be created at runtime when persistent storage is mounted
+RUN mkdir -p $HOME/app/cache
 # Expose port 7860 (HF Spaces default)
 EXPOSE 7860

main.py CHANGED Viewed

@@ -5,12 +5,16 @@ import numpy as np
 import librosa
 from typing import Dict, Any
 import logging
 from contextlib import asynccontextmanager
 from models.nationality_model import NationalityModel
 from models.age_and_gender_model import AgeGenderModel
-# Configure logging
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 UPLOAD_FOLDER = 'uploads'
@@ -31,25 +35,36 @@ async def load_models() -> bool:
     global age_gender_model, nationality_model
     try:
         # Load age & gender model
-        logger.info("Loading age & gender model...")
         age_gender_model = AgeGenderModel()
         age_gender_success = age_gender_model.load()
         if not age_gender_success:
             logger.error("Failed to load age & gender model")
             return False
         # Load nationality model
-        logger.info("Loading nationality model...")
         nationality_model = NationalityModel()
         nationality_success = nationality_model.load()
         if not nationality_success:
             logger.error("Failed to load nationality model")
             return False
-        logger.info("All models loaded successfully!")
         return True
     except Exception as e:
         logger.error(f"Error loading models: {e}")
@@ -59,9 +74,14 @@ async def load_models() -> bool:
 async def lifespan(app: FastAPI):
     # Startup
     logger.info("Starting FastAPI application...")
     success = await load_models()
     if not success:
         logger.error("Failed to load models. Application will not work properly.")
     yield
@@ -77,49 +97,93 @@ app = FastAPI(
 )
 def preprocess_audio(audio_data: np.ndarray, sr: int) -> tuple[np.ndarray, int]:
     if len(audio_data.shape) > 1:
         audio_data = librosa.to_mono(audio_data)
     if sr != SAMPLING_RATE:
-        logger.info(f"Resampling from {sr}Hz to {SAMPLING_RATE}Hz")
         audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=SAMPLING_RATE)
     audio_data = audio_data.astype(np.float32)
     return audio_data, SAMPLING_RATE
 async def process_audio_file(file: UploadFile) -> tuple[np.ndarray, int]:
     if not file.filename:
         raise HTTPException(status_code=400, detail="No file selected")
     if not allowed_file(file.filename):
         raise HTTPException(status_code=400, detail="Invalid file type. Allowed: wav, mp3, flac, m4a")
     # Create a secure filename
-    filename = f"temp_{file.filename}"
     filepath = os.path.join(UPLOAD_FOLDER, filename)
     try:
         # Save uploaded file temporarily
         with open(filepath, "wb") as buffer:
             content = await file.read()
             buffer.write(content)
         # Load and preprocess audio
         audio_data, sr = librosa.load(filepath, sr=None)
         processed_audio, processed_sr = preprocess_audio(audio_data, sr)
         return processed_audio, processed_sr
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error processing audio file: {str(e)}")
     finally:
         # Clean up temporary file
         if os.path.exists(filepath):
             os.remove(filepath)
 @app.get("/")
 async def root() -> Dict[str, Any]:
     return {
         "message": "Audio Analysis API - Age, Gender & Nationality Prediction",
         "models_loaded": {
@@ -137,79 +201,151 @@ async def root() -> Dict[str, Any]:
 @app.get("/health")
 async def health_check() -> Dict[str, str]:
     return {"status": "healthy"}
 @app.post("/predict_age_and_gender")
 async def predict_age_and_gender(file: UploadFile = File(...)) -> Dict[str, Any]:
     """Predict age and gender from uploaded audio file."""
     if age_gender_model is None or not hasattr(age_gender_model, 'model') or age_gender_model.model is None:
         raise HTTPException(status_code=500, detail="Age & gender model not loaded")
     try:
         processed_audio, processed_sr = await process_audio_file(file)
         predictions = age_gender_model.predict(processed_audio, processed_sr)
         return {
             "success": True,
-            "predictions": predictions
         }
     except HTTPException:
         raise
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/predict_nationality")
 async def predict_nationality(file: UploadFile = File(...)) -> Dict[str, Any]:
     """Predict nationality/language from uploaded audio file."""
     if nationality_model is None or not hasattr(nationality_model, 'model') or nationality_model.model is None:
         raise HTTPException(status_code=500, detail="Nationality model not loaded")
     try:
         processed_audio, processed_sr = await process_audio_file(file)
         predictions = nationality_model.predict(processed_audio, processed_sr)
         return {
             "success": True,
-            "predictions": predictions
         }
     except HTTPException:
         raise
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/predict_all")
 async def predict_all(file: UploadFile = File(...)) -> Dict[str, Any]:
     if age_gender_model is None or not hasattr(age_gender_model, 'model') or age_gender_model.model is None:
         raise HTTPException(status_code=500, detail="Age & gender model not loaded")
     if nationality_model is None or not hasattr(nationality_model, 'model') or nationality_model.model is None:
         raise HTTPException(status_code=500, detail="Nationality model not loaded")
     try:
         processed_audio, processed_sr = await process_audio_file(file)
-        # Get both predictions
         age_gender_predictions = age_gender_model.predict(processed_audio, processed_sr)
         nationality_predictions = nationality_model.predict(processed_audio, processed_sr)
         return {
             "success": True,
             "predictions": {
                 "demographics": age_gender_predictions,
                 "nationality": nationality_predictions
             }
         }
     except HTTPException:
         raise
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     import uvicorn
     port = int(os.environ.get("PORT", 7860))
     uvicorn.run(
         "app:app",
         host="0.0.0.0",

 import librosa
 from typing import Dict, Any
 import logging
+import time
 from contextlib import asynccontextmanager
 from models.nationality_model import NationalityModel
 from models.age_and_gender_model import AgeGenderModel
+# Configure logging with more detailed format
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
 logger = logging.getLogger(__name__)
 UPLOAD_FOLDER = 'uploads'
     global age_gender_model, nationality_model
     try:
+        total_start_time = time.time()
         # Load age & gender model
+        logger.info("Starting age & gender model loading...")
+        age_start = time.time()
         age_gender_model = AgeGenderModel()
         age_gender_success = age_gender_model.load()
+        age_end = time.time()
         if not age_gender_success:
             logger.error("Failed to load age & gender model")
             return False
+        logger.info(f"Age & gender model loaded successfully in {age_end - age_start:.2f} seconds")
         # Load nationality model
+        logger.info("Starting nationality model loading...")
+        nationality_start = time.time()
         nationality_model = NationalityModel()
         nationality_success = nationality_model.load()
+        nationality_end = time.time()
         if not nationality_success:
             logger.error("Failed to load nationality model")
             return False
+        logger.info(f"Nationality model loaded successfully in {nationality_end - nationality_start:.2f} seconds")
+        total_end = time.time()
+        logger.info(f"All models loaded successfully! Total time: {total_end - total_start_time:.2f} seconds")
         return True
     except Exception as e:
         logger.error(f"Error loading models: {e}")
 async def lifespan(app: FastAPI):
     # Startup
     logger.info("Starting FastAPI application...")
+    startup_start = time.time()
     success = await load_models()
+    startup_end = time.time()
     if not success:
         logger.error("Failed to load models. Application will not work properly.")
+    else:
+        logger.info(f"FastAPI application started successfully in {startup_end - startup_start:.2f} seconds")
     yield
 )
 def preprocess_audio(audio_data: np.ndarray, sr: int) -> tuple[np.ndarray, int]:
+    preprocess_start = time.time()
+    original_shape = audio_data.shape
+    logger.info(f"Starting audio preprocessing - Original shape: {original_shape}, Sample rate: {sr}Hz")
+    # Convert to mono if stereo
     if len(audio_data.shape) > 1:
+        mono_start = time.time()
         audio_data = librosa.to_mono(audio_data)
+        mono_end = time.time()
+        logger.info(f"Converted stereo to mono in {mono_end - mono_start:.3f} seconds - New shape: {audio_data.shape}")
+    # Resample if needed
     if sr != SAMPLING_RATE:
+        resample_start = time.time()
+        logger.info(f"Resampling from {sr}Hz to {SAMPLING_RATE}Hz...")
         audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=SAMPLING_RATE)
+        resample_end = time.time()
+        logger.info(f"Resampling completed in {resample_end - resample_start:.3f} seconds")
+    else:
+        logger.info(f"No resampling needed - already at {SAMPLING_RATE}Hz")
+    # Convert to float32
     audio_data = audio_data.astype(np.float32)
+    preprocess_end = time.time()
+    duration_seconds = len(audio_data) / SAMPLING_RATE
+    logger.info(f"Audio preprocessing completed in {preprocess_end - preprocess_start:.3f} seconds")
+    logger.info(f"Final audio: {audio_data.shape} samples, {duration_seconds:.2f} seconds duration")
     return audio_data, SAMPLING_RATE
 async def process_audio_file(file: UploadFile) -> tuple[np.ndarray, int]:
+    process_start = time.time()
+    logger.info(f"Processing uploaded file: {file.filename}")
     if not file.filename:
         raise HTTPException(status_code=400, detail="No file selected")
     if not allowed_file(file.filename):
+        logger.warning(f"Invalid file type uploaded: {file.filename}")
         raise HTTPException(status_code=400, detail="Invalid file type. Allowed: wav, mp3, flac, m4a")
+    # Get file extension and log it
+    file_ext = file.filename.rsplit('.', 1)[1].lower()
+    logger.info(f"Processing {file_ext.upper()} file: {file.filename}")
     # Create a secure filename
+    filename = f"temp_{int(time.time())}_{file.filename}"
     filepath = os.path.join(UPLOAD_FOLDER, filename)
     try:
         # Save uploaded file temporarily
+        save_start = time.time()
         with open(filepath, "wb") as buffer:
             content = await file.read()
             buffer.write(content)
+        save_end = time.time()
+        file_size_mb = len(content) / (1024 * 1024)
+        logger.info(f"File saved ({file_size_mb:.2f} MB) in {save_end - save_start:.3f} seconds")
         # Load and preprocess audio
+        load_start = time.time()
+        logger.info(f"Loading audio from {filepath}...")
         audio_data, sr = librosa.load(filepath, sr=None)
+        load_end = time.time()
+        logger.info(f"Audio loaded in {load_end - load_start:.3f} seconds")
         processed_audio, processed_sr = preprocess_audio(audio_data, sr)
+        process_end = time.time()
+        logger.info(f"Total file processing completed in {process_end - process_start:.3f} seconds")
         return processed_audio, processed_sr
     except Exception as e:
+        logger.error(f"Error processing audio file {file.filename}: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Error processing audio file: {str(e)}")
     finally:
         # Clean up temporary file
         if os.path.exists(filepath):
             os.remove(filepath)
+            logger.info(f"Temporary file {filename} cleaned up")
 @app.get("/")
 async def root() -> Dict[str, Any]:
+    logger.info("Root endpoint accessed")
     return {
         "message": "Audio Analysis API - Age, Gender & Nationality Prediction",
         "models_loaded": {
 @app.get("/health")
 async def health_check() -> Dict[str, str]:
+    logger.info("Health check endpoint accessed")
     return {"status": "healthy"}
 @app.post("/predict_age_and_gender")
 async def predict_age_and_gender(file: UploadFile = File(...)) -> Dict[str, Any]:
     """Predict age and gender from uploaded audio file."""
+    endpoint_start = time.time()
+    logger.info(f"Age & Gender prediction requested for file: {file.filename}")
     if age_gender_model is None or not hasattr(age_gender_model, 'model') or age_gender_model.model is None:
+        logger.error("Age & gender model not loaded - returning 500 error")
         raise HTTPException(status_code=500, detail="Age & gender model not loaded")
     try:
         processed_audio, processed_sr = await process_audio_file(file)
+        # Make prediction
+        prediction_start = time.time()
+        logger.info("Starting age & gender prediction...")
         predictions = age_gender_model.predict(processed_audio, processed_sr)
+        prediction_end = time.time()
+        logger.info(f"Age & gender prediction completed in {prediction_end - prediction_start:.3f} seconds")
+        logger.info(f"Predicted age: {predictions['age']['predicted_age']:.1f} years")
+        logger.info(f"Predicted gender: {predictions['gender']['predicted_gender']} (confidence: {predictions['gender']['confidence']:.3f})")
+        endpoint_end = time.time()
+        logger.info(f"Total age & gender endpoint processing time: {endpoint_end - endpoint_start:.3f} seconds")
         return {
             "success": True,
+            "predictions": predictions,
+            "processing_time": round(endpoint_end - endpoint_start, 3)
         }
     except HTTPException:
         raise
     except Exception as e:
+        logger.error(f"Error in age & gender prediction: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/predict_nationality")
 async def predict_nationality(file: UploadFile = File(...)) -> Dict[str, Any]:
     """Predict nationality/language from uploaded audio file."""
+    endpoint_start = time.time()
+    logger.info(f"Nationality prediction requested for file: {file.filename}")
     if nationality_model is None or not hasattr(nationality_model, 'model') or nationality_model.model is None:
+        logger.error("Nationality model not loaded - returning 500 error")
         raise HTTPException(status_code=500, detail="Nationality model not loaded")
     try:
         processed_audio, processed_sr = await process_audio_file(file)
+        # Make prediction
+        prediction_start = time.time()
+        logger.info("Starting nationality prediction...")
         predictions = nationality_model.predict(processed_audio, processed_sr)
+        prediction_end = time.time()
+        logger.info(f"Nationality prediction completed in {prediction_end - prediction_start:.3f} seconds")
+        logger.info(f"Predicted language: {predictions['predicted_language']} (confidence: {predictions['confidence']:.3f})")
+        logger.info(f"Top 3 languages: {[lang['language_code'] for lang in predictions['top_languages'][:3]]}")
+        endpoint_end = time.time()
+        logger.info(f"Total nationality endpoint processing time: {endpoint_end - endpoint_start:.3f} seconds")
         return {
             "success": True,
+            "predictions": predictions,
+            "processing_time": round(endpoint_end - endpoint_start, 3)
         }
     except HTTPException:
         raise
     except Exception as e:
+        logger.error(f"Error in nationality prediction: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/predict_all")
 async def predict_all(file: UploadFile = File(...)) -> Dict[str, Any]:
+    """Predict age, gender, and nationality from uploaded audio file."""
+    endpoint_start = time.time()
+    logger.info(f"Complete analysis requested for file: {file.filename}")
     if age_gender_model is None or not hasattr(age_gender_model, 'model') or age_gender_model.model is None:
+        logger.error("Age & gender model not loaded - returning 500 error")
         raise HTTPException(status_code=500, detail="Age & gender model not loaded")
     if nationality_model is None or not hasattr(nationality_model, 'model') or nationality_model.model is None:
+        logger.error("Nationality model not loaded - returning 500 error")
         raise HTTPException(status_code=500, detail="Nationality model not loaded")
     try:
         processed_audio, processed_sr = await process_audio_file(file)
+        # Get age & gender predictions
+        age_prediction_start = time.time()
+        logger.info("Starting age & gender prediction for complete analysis...")
         age_gender_predictions = age_gender_model.predict(processed_audio, processed_sr)
+        age_prediction_end = time.time()
+        logger.info(f"Age & gender prediction completed in {age_prediction_end - age_prediction_start:.3f} seconds")
+        # Get nationality predictions
+        nationality_prediction_start = time.time()
+        logger.info("Starting nationality prediction for complete analysis...")
         nationality_predictions = nationality_model.predict(processed_audio, processed_sr)
+        nationality_prediction_end = time.time()
+        logger.info(f"Nationality prediction completed in {nationality_prediction_end - nationality_prediction_start:.3f} seconds")
+        # Log combined results
+        logger.info(f"Complete analysis results:")
+        logger.info(f"  - Age: {age_gender_predictions['age']['predicted_age']:.1f} years")
+        logger.info(f"  - Gender: {age_gender_predictions['gender']['predicted_gender']} (confidence: {age_gender_predictions['gender']['confidence']:.3f})")
+        logger.info(f"  - Language: {nationality_predictions['predicted_language']} (confidence: {nationality_predictions['confidence']:.3f})")
+        total_prediction_time = (age_prediction_end - age_prediction_start) + (nationality_prediction_end - nationality_prediction_start)
+        endpoint_end = time.time()
+        logger.info(f"Total prediction time: {total_prediction_time:.3f} seconds")
+        logger.info(f"Total complete analysis endpoint processing time: {endpoint_end - endpoint_start:.3f} seconds")
         return {
             "success": True,
             "predictions": {
                 "demographics": age_gender_predictions,
                 "nationality": nationality_predictions
+            },
+            "processing_time": {
+                "total": round(endpoint_end - endpoint_start, 3),
+                "age_gender": round(age_prediction_end - age_prediction_start, 3),
+                "nationality": round(nationality_prediction_end - nationality_prediction_start, 3)
             }
         }
     except HTTPException:
         raise
     except Exception as e:
+        logger.error(f"Error in complete analysis: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     import uvicorn
     port = int(os.environ.get("PORT", 7860))
+    logger.info(f"Starting server on port {port}")
     uvicorn.run(
         "app:app",
         host="0.0.0.0",

models/age_and_gender_model.py CHANGED Viewed

@@ -6,12 +6,22 @@ import audinterface
 import librosa
 class AgeGenderModel:
-    def __init__(self, model_path="./cache/age_and_gender"):
-        self.model_path = model_path
         self.model = None
         self.interface = None
         self.sampling_rate = 16000
-        os.makedirs(model_path, exist_ok=True)
     def download_model(self):
         model_onnx = os.path.join(self.model_path, 'model.onnx')
@@ -24,7 +34,12 @@ class AgeGenderModel:
         print("Age & gender model files not found. Downloading...")
         try:
-            cache_root = 'cache'
             audeer.mkdir(cache_root)
             audeer.mkdir(self.model_path)
@@ -63,7 +78,7 @@ class AgeGenderModel:
                 return False
             # Load the audonnx model
-            print("Loading age & gender model...")
             self.model = audonnx.load(self.model_path)
             # Create the audinterface Feature interface

 import librosa
 class AgeGenderModel:
+    def __init__(self, model_path=None):
+        # Use persistent storage if available, fallback to local cache
+        if model_path is None:
+            if os.path.exists("/data"):
+                # HF Spaces persistent storage
+                self.model_path = "/data/age_and_gender"
+            else:
+                # Local development or other platforms
+                self.model_path = "./cache/age_and_gender"
+        else:
+            self.model_path = model_path
         self.model = None
         self.interface = None
         self.sampling_rate = 16000
+        os.makedirs(self.model_path, exist_ok=True)
     def download_model(self):
         model_onnx = os.path.join(self.model_path, 'model.onnx')
         print("Age & gender model files not found. Downloading...")
         try:
+            # Use /data for cache if available, otherwise use local cache
+            if os.path.exists("/data"):
+                cache_root = '/data/cache'
+            else:
+                cache_root = 'cache'
             audeer.mkdir(cache_root)
             audeer.mkdir(self.model_path)
                 return False
             # Load the audonnx model
+            print(f"Loading age & gender model from {self.model_path}...")
             self.model = audonnx.load(self.model_path)
             # Create the audinterface Feature interface

models/nationality_model.py CHANGED Viewed

@@ -8,17 +8,35 @@ MODEL_ID = "facebook/mms-lid-256"
 SAMPLING_RATE = 16000
 class NationalityModel:
-    def __init__(self, cache_dir="./cache/nationality"):
         self.processor = None
         self.model = None
-        self.cache_dir = cache_dir
-        os.makedirs(cache_dir, exist_ok=True)
     def load(self):
         try:
             print(f"Loading nationality prediction model from {MODEL_ID}...")
-            self.processor = AutoFeatureExtractor.from_pretrained(MODEL_ID, cache_dir=self.cache_dir)
-            self.model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_ID, cache_dir=self.cache_dir)
             print("Nationality prediction model loaded successfully!")
             return True
         except Exception as e:
@@ -70,4 +88,4 @@ class NationalityModel:
             }
         except Exception as e:
-            raise Exception(f"Nationality prediction error: {str(e)}")

 SAMPLING_RATE = 16000
 class NationalityModel:
+    def __init__(self, cache_dir=None):
+        # Use persistent storage if available, fallback to local cache
+        if cache_dir is None:
+            if os.path.exists("/data"):
+                # HF Spaces persistent storage
+                self.cache_dir = "/data/nationality"
+            else:
+                # Local development or other platforms
+                self.cache_dir = "./cache/nationality"
+        else:
+            self.cache_dir = cache_dir
         self.processor = None
         self.model = None
+        os.makedirs(self.cache_dir, exist_ok=True)
     def load(self):
         try:
             print(f"Loading nationality prediction model from {MODEL_ID}...")
+            print(f"Using cache directory: {self.cache_dir}")
+            self.processor = AutoFeatureExtractor.from_pretrained(
+                MODEL_ID,
+                cache_dir=self.cache_dir
+            )
+            self.model = Wav2Vec2ForSequenceClassification.from_pretrained(
+                MODEL_ID,
+                cache_dir=self.cache_dir
+            )
             print("Nationality prediction model loaded successfully!")
             return True
         except Exception as e:
             }
         except Exception as e:
+            raise Exception(f"Nationality prediction error: {str(e)}")