from flask import Flask, request, jsonify, Response from faster_whisper import WhisperModel import torch import io import time import datetime from threading import Semaphore import os from werkzeug.utils import secure_filename import tempfile from moviepy.editor import VideoFileClip import firebase_admin from firebase_admin import credentials, messaging # Added for FCM app = Flask(__name__) # Configuration MAX_CONCURRENT_REQUESTS = 2 MAX_FILE_DURATION = 60 * 30 TEMPORARY_FOLDER = tempfile.gettempdir() ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac', 'aac', 'wma', 'opus', 'aiff'} ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv', 'mpeg', 'mpg', '3gp'} ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS) # Initialize Firebase Admin SDK using environment variables firebase_credentials = { "type": "service_account", "project_id": os.getenv("FIREBASE_PROJECT_ID"), "private_key_id": os.getenv("FIREBASE_PRIVATE_KEY_ID"), "private_key": os.getenv("FIREBASE_PRIVATE_KEY").replace("\\n", "\n"), "client_email": os.getenv("FIREBASE_CLIENT_EMAIL"), "client_id": os.getenv("FIREBASE_CLIENT_ID"), "auth_uri": "https://accounts.google.com/o/oauth2/auth", "token_uri": "https://oauth2.googleapis.com/token", "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", "client_x509_cert_url": f"https://www.googleapis.com/robot/v1/metadata/x509/{os.getenv('FIREBASE_CLIENT_EMAIL')}" } cred = credentials.Certificate(firebase_credentials) firebase_admin.initialize_app(cred) # Device check for faster-whisper device = "cuda" if torch.cuda.is_available() else "cpu" compute_type = "float16" if device == "cuda" else "int8" print(f"Using device: {device} with compute_type: {compute_type}") # Faster Whisper setup beamsize = 2 wmodel = WhisperModel( "guillaumekln/faster-whisper-small", device=device, compute_type=compute_type, download_root="./model_cache" ) # Concurrency control request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS) active_requests = 0 def allowed_file(filename): return '.' in filename and \ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS def cleanup_temp_files(*file_paths): for file_path in file_paths: try: if file_path and os.path.exists(file_path): os.remove(file_path) except Exception as e: print(f"Error cleaning up temp file {file_path}: {str(e)}") def extract_audio_from_video(video_path, output_audio_path): try: video = VideoFileClip(video_path) if video.duration > MAX_FILE_DURATION: video.close() raise ValueError(f"Video duration exceeds {MAX_FILE_DURATION} seconds") video.audio.write_audiofile(output_audio_path) video.close() return output_audio_path except Exception as e: raise Exception(f"Failed to extract audio from video: {str(e)}") def send_fcm_data_message(fcm_token, transcription, file_type, created_date, transcription_name): """Send an FCM message with transcription details and a notification""" try: message = messaging.Message( notification=messaging.Notification( title=transcription_name, body="Successfully downloaded" ), data={ 'transcription': transcription, 'file_type': file_type, 'created_date': created_date, 'transcription_name': transcription_name }, token=fcm_token ) response = messaging.send(message) print(f"FCM message sent: {response}") return True except Exception as e: print(f"Error sending FCM message: {str(e)}") return False @app.route("/health", methods=["GET"]) def health_check(): return jsonify({ 'status': 'API is running', 'timestamp': datetime.datetime.now().isoformat(), 'device': device, 'compute_type': compute_type, 'active_requests': active_requests, 'max_duration_supported': MAX_FILE_DURATION, 'supported_formats': list(ALLOWED_EXTENSIONS) }) @app.route("/status/busy", methods=["GET"]) def server_busy(): is_busy = active_requests >= MAX_CONCURRENT_REQUESTS return jsonify({ 'is_busy': is_busy, 'active_requests': active_requests, 'max_capacity': MAX_CONCURRENT_REQUESTS }) @app.route("/whisper_transcribe", methods=["POST"]) def transcribe(): global active_requests if not request_semaphore.acquire(blocking=False): return jsonify({'error': 'Server busy'}), 503 active_requests += 1 start_time = time.time() temp_file_path = None temp_audio_path = None try: if 'file' not in request.files or 'fcm_token' not in request.form: return jsonify({'error': 'Missing file or FCM token'}), 400 file = request.files['file'] fcm_token = request.form['fcm_token'] created_date = request.form['created_date'] transcription_name = request.form['transcription_name'] if not (file and allowed_file(file.filename)): return jsonify({'error': f'Invalid file format. Supported: {", ".join(ALLOWED_EXTENSIONS)}'}), 400 # Save uploaded file temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename)) file.save(temp_file_path) # Handle video/audio file_extension = file.filename.rsplit('.', 1)[1].lower() if file_extension in ALLOWED_VIDEO_EXTENSIONS: temp_audio_path = os.path.join(TEMPORARY_FOLDER, f"temp_audio_{int(time.time())}.wav") extract_audio_from_video(temp_file_path, temp_audio_path) transcription_file = temp_audio_path else: transcription_file = temp_file_path # Transcribe segments, _ = wmodel.transcribe( transcription_file, beam_size=beamsize, vad_filter=True, without_timestamps=True, compression_ratio_threshold=2.4, word_timestamps=False ) full_text = " ".join(segment.text for segment in segments) file_type = 'video' if file_extension in ALLOWED_VIDEO_EXTENSIONS else 'audio' # Send FCM data message # Send FCM data message send_fcm_data_message(fcm_token, full_text, file_type, created_date, transcription_name) return jsonify({}), 200 except Exception as e: return jsonify({'error': str(e)}), 500 finally: cleanup_temp_files(temp_file_path, temp_audio_path) active_requests -= 1 request_semaphore.release() print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})") if __name__ == "__main__": if not os.path.exists(TEMPORARY_FOLDER): os.makedirs(TEMPORARY_FOLDER) app.run(host="0.0.0.0", port=7860, threaded=True)