import logging from faster_whisper import WhisperModel import spacy from transformers import pipeline import torch from pyannote.audio import Pipeline logger = logging.getLogger(__name__) _diarize_model = None def load_whisper(config): logger.info("Loading Whisper model...") try: model = WhisperModel( config.WHISPER_MODEL_NAME, device=config.WHISPER_DEVICE, compute_type=config.WHISPER_COMPUTE_TYPE, cpu_threads=config.CPU_THREADS, ) logger.info( f"Whisper model '{config.WHISPER_MODEL_NAME}' loaded on {config.WHISPER_DEVICE}." ) return model except Exception as e: logger.error(f"Failed to load Whisper model: {e}", exc_info=True) return None def load_diarization(config): global _diarize_model logger.info("Loading PYANNOTE model...") if _diarize_model is None and hasattr(config, "PYANNOTE_AUTH_TOKEN"): try: logger.info("Loading diarization model") _diarize_model = Pipeline.from_pretrained( "pyannote/speaker-diarization-3.0", use_auth_token=config.PYANNOTE_AUTH_TOKEN, ) # Move to GPU if available if ( hasattr(config, "WHISPER_DEVICE") and config.WHISPER_DEVICE == "cuda" and torch.cuda.is_available() ): _diarize_model = _diarize_model.to(torch.device("cuda")) logger.info("Diarization model loaded successfully") except Exception as e: logger.error(f"Failed to load diarization model: {e}", exc_info=True) return _diarize_model def load_summarizer(config): logger.info("Loading Summarization pipeline...") try: summarizer = pipeline( "text2text-generation", model=config.SUMMARIZER_MODEL, device=0 if torch.cuda.is_available() else -1, ) logger.info("Summarization pipeline loaded.") return summarizer except Exception as e: logger.error(f"Failed to load Summarization pipeline: {e}", exc_info=True) return None def load_spacy(config): logger.info("Loading spaCy model...") try: nlp = spacy.load("en_core_web_sm") logger.info("spaCy model 'en_core_web_sm' loaded.") return nlp except OSError: logger.warning("spaCy model 'en_core_web_sm' not found. Trying to download...") try: spacy.cli.download("en_core_web_sm") nlp = spacy.load("en_core_web_sm") logger.info("spaCy model 'en_core_web_sm' downloaded and loaded.") return nlp except Exception as download_e: logger.error( f"Failed to download or load spaCy model 'en_core_web_sm': {download_e}" ) return None except Exception as e: logger.error(f"Failed to load spaCy model: {e}") return None