Spaces:

hamza2923
/

faster-whisper-transcription-api10

Sleeping

App Files Files Community

hamza2923 commited on Jun 4

Commit

816272b

verified ·

1 Parent(s): e1e9bea

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -15

app.py CHANGED Viewed

@@ -8,24 +8,43 @@ from threading import Semaphore
 import os
 from werkzeug.utils import secure_filename
 import tempfile
-from moviepy.editor import VideoFileClip  # Added for video processing
 app = Flask(__name__)
 # Configuration
-MAX_CONCURRENT_REQUESTS = 2  # Adjust based on server capacity
-MAX_FILE_DURATION = 60 * 30  # 30 minutes maximum duration (adjust as needed)
 TEMPORARY_FOLDER = tempfile.gettempdir()
 ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac', 'aac', 'wma', 'opus', 'aiff'}
 ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv', 'mpeg', 'mpg', '3gp'}
 ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS)
 # Device check for faster-whisper
 device = "cuda" if torch.cuda.is_available() else "cpu"
 compute_type = "float16" if device == "cuda" else "int8"
 print(f"Using device: {device} with compute_type: {compute_type}")
-# Faster Whisper setup with optimized parameters for long audio
 beamsize = 2
 wmodel = WhisperModel(
     "guillaumekln/faster-whisper-small",
@@ -43,7 +62,6 @@ def allowed_file(filename):
            filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
 def cleanup_temp_files(*file_paths):
-    """Ensure temporary files are deleted after processing"""
     for file_path in file_paths:
         try:
             if file_path and os.path.exists(file_path):
@@ -52,7 +70,6 @@ def cleanup_temp_files(*file_paths):
             print(f"Error cleaning up temp file {file_path}: {str(e)}")
 def extract_audio_from_video(video_path, output_audio_path):
-    """Extract audio from a video file and save it as a temporary audio file"""
     try:
         video = VideoFileClip(video_path)
         if video.duration > MAX_FILE_DURATION:
@@ -64,9 +81,26 @@ def extract_audio_from_video(video_path, output_audio_path):
     except Exception as e:
         raise Exception(f"Failed to extract audio from video: {str(e)}")
 @app.route("/health", methods=["GET"])
 def health_check():
-    """Endpoint to check if API is running"""
     return jsonify({
         'status': 'API is running',
         'timestamp': datetime.datetime.now().isoformat(),
@@ -79,7 +113,6 @@ def health_check():
 @app.route("/status/busy", methods=["GET"])
 def server_busy():
-    """Endpoint to check if server is busy"""
     is_busy = active_requests >= MAX_CONCURRENT_REQUESTS
     return jsonify({
         'is_busy': is_busy,
@@ -100,18 +133,19 @@ def transcribe():
     temp_audio_path = None
     try:
-        if 'file' not in request.files:
-            return jsonify({'error': 'No file provided'}), 400
         file = request.files['file']
         if not (file and allowed_file(file.filename)):
             return jsonify({'error': f'Invalid file format. Supported: {", ".join(ALLOWED_EXTENSIONS)}'}), 400
-        # Save uploaded file to temporary location
         temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename))
         file.save(temp_file_path)
-        # Check if file is a video and extract audio if necessary
         file_extension = file.filename.rsplit('.', 1)[1].lower()
         if file_extension in ALLOWED_VIDEO_EXTENSIONS:
             temp_audio_path = os.path.join(TEMPORARY_FOLDER, f"temp_audio_{int(time.time())}.wav")
@@ -120,7 +154,7 @@ def transcribe():
         else:
             transcription_file = temp_file_path
-        # Transcribe the audio file
         segments, _ = wmodel.transcribe(
             transcription_file,
             beam_size=beamsize,
@@ -131,9 +165,14 @@ def transcribe():
         )
         full_text = " ".join(segment.text for segment in segments)
         return jsonify({
             'transcription': full_text,
-            'file_type': 'video' if file_extension in ALLOWED_VIDEO_EXTENSIONS else 'audio'
         }), 200
     except Exception as e:
@@ -146,7 +185,6 @@ def transcribe():
         print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})")
 if __name__ == "__main__":
-    # Create temporary folder if it doesn't exist
     if not os.path.exists(TEMPORARY_FOLDER):
         os.makedirs(TEMPORARY_FOLDER)

 import os
 from werkzeug.utils import secure_filename
 import tempfile
+from moviepy.editor import VideoFileClip
+import firebase_admin
+from firebase_admin import credentials, messaging  # Added for FCM
 app = Flask(__name__)
 # Configuration
+MAX_CONCURRENT_REQUESTS = 2
+MAX_FILE_DURATION = 60 * 30
 TEMPORARY_FOLDER = tempfile.gettempdir()
 ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac', 'aac', 'wma', 'opus', 'aiff'}
 ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv', 'mpeg', 'mpg', '3gp'}
 ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS)
+# Initialize Firebase Admin SDK using environment variables
+firebase_credentials = {
+    "type": "service_account",
+    "project_id": os.getenv("FIREBASE_PROJECT_ID"),
+    "private_key_id": os.getenv("FIREBASE_PRIVATE_KEY_ID"),
+    "private_key": os.getenv("FIREBASE_PRIVATE_KEY").replace("\\n", "\n"),
+    "client_email": os.getenv("FIREBASE_CLIENT_EMAIL"),
+    "client_id": os.getenv("FIREBASE_CLIENT_ID"),
+    "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+    "token_uri": "https://oauth2.googleapis.com/token",
+    "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+    "client_x509_cert_url": f"https://www.googleapis.com/robot/v1/metadata/x509/{os.getenv('FIREBASE_CLIENT_EMAIL')}"
+}
+cred = credentials.Certificate(firebase_credentials)
+firebase_admin.initialize_app(cred)
 # Device check for faster-whisper
 device = "cuda" if torch.cuda.is_available() else "cpu"
 compute_type = "float16" if device == "cuda" else "int8"
 print(f"Using device: {device} with compute_type: {compute_type}")
+# Faster Whisper setup
 beamsize = 2
 wmodel = WhisperModel(
     "guillaumekln/faster-whisper-small",
            filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
 def cleanup_temp_files(*file_paths):
     for file_path in file_paths:
         try:
             if file_path and os.path.exists(file_path):
             print(f"Error cleaning up temp file {file_path}: {str(e)}")
 def extract_audio_from_video(video_path, output_audio_path):
     try:
         video = VideoFileClip(video_path)
         if video.duration > MAX_FILE_DURATION:
     except Exception as e:
         raise Exception(f"Failed to extract audio from video: {str(e)}")
+def send_fcm_data_message(fcm_token, transcription, file_type):
+    """Send a silent FCM data message with transcription details"""
+    try:
+        message = messaging.Message(
+            data={
+                'transcription': transcription,
+                'file_type': file_type,
+                'timestamp': datetime.datetime.now().isoformat()
+            },
+            token=fcm_token
+        )
+        response = messaging.send(message)
+        print(f"FCM message sent: {response}")
+        return True
+    except Exception as e:
+        print(f"Error sending FCM message: {str(e)}")
+        return False
 @app.route("/health", methods=["GET"])
 def health_check():
     return jsonify({
         'status': 'API is running',
         'timestamp': datetime.datetime.now().isoformat(),
 @app.route("/status/busy", methods=["GET"])
 def server_busy():
     is_busy = active_requests >= MAX_CONCURRENT_REQUESTS
     return jsonify({
         'is_busy': is_busy,
     temp_audio_path = None
     try:
+        if 'file' not in request.files or 'fcm_token' not in request.form:
+            return jsonify({'error': 'Missing file or FCM token'}), 400
         file = request.files['file']
+        fcm_token = request.form['fcm_token']
         if not (file and allowed_file(file.filename)):
             return jsonify({'error': f'Invalid file format. Supported: {", ".join(ALLOWED_EXTENSIONS)}'}), 400
+        # Save uploaded file
         temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename))
         file.save(temp_file_path)
+        # Handle video/audio
         file_extension = file.filename.rsplit('.', 1)[1].lower()
         if file_extension in ALLOWED_VIDEO_EXTENSIONS:
             temp_audio_path = os.path.join(TEMPORARY_FOLDER, f"temp_audio_{int(time.time())}.wav")
         else:
             transcription_file = temp_file_path
+        # Transcribe
         segments, _ = wmodel.transcribe(
             transcription_file,
             beam_size=beamsize,
         )
         full_text = " ".join(segment.text for segment in segments)
+        file_type = 'video' if file_extension in ALLOWED_VIDEO_EXTENSIONS else 'audio'
+        # Send FCM data message
+        send_fcm_data_message(fcm_token, full_text, file_type)
         return jsonify({
             'transcription': full_text,
+            'file_type': file_type
         }), 200
     except Exception as e:
         print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})")
 if __name__ == "__main__":
     if not os.path.exists(TEMPORARY_FOLDER):
         os.makedirs(TEMPORARY_FOLDER)