Spaces:

hamza2923
/

faster-whisper-transcription-api1

Running

File size: 5,537 Bytes

from flask import Flask, request, jsonify, Response
from faster_whisper import WhisperModel
import torch
import io
import time
import datetime
from threading import Semaphore
import os
from werkzeug.utils import secure_filename
import tempfile
from moviepy.editor import VideoFileClip  # Added for video processing

app = Flask(__name__)

# Configuration
MAX_CONCURRENT_REQUESTS = 2  # Adjust based on server capacity
MAX_FILE_DURATION = 60 * 30  # 30 minutes maximum duration (adjust as needed)
TEMPORARY_FOLDER = tempfile.gettempdir()
ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac', 'aac', 'wma', 'opus', 'aiff'}
ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv', 'mpeg', 'mpg', '3gp'}
ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS)

# Device check for faster-whisper
device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = "float16" if device == "cuda" else "int8"
print(f"Using device: {device} with compute_type: {compute_type}")

# Faster Whisper setup with optimized parameters for long audio
beamsize = 2
wmodel = WhisperModel(
    "guillaumekln/faster-whisper-small",
    device=device,
    compute_type=compute_type,
    download_root="./model_cache"
)

# Concurrency control
request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)
active_requests = 0

def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def cleanup_temp_files(*file_paths):
    """Ensure temporary files are deleted after processing"""
    for file_path in file_paths:
        try:
            if file_path and os.path.exists(file_path):
                os.remove(file_path)
        except Exception as e:
            print(f"Error cleaning up temp file {file_path}: {str(e)}")

def extract_audio_from_video(video_path, output_audio_path):
    """Extract audio from a video file and save it as a temporary audio file"""
    try:
        video = VideoFileClip(video_path)
        if video.duration > MAX_FILE_DURATION:
            video.close()
            raise ValueError(f"Video duration exceeds {MAX_FILE_DURATION} seconds")
        video.audio.write_audiofile(output_audio_path)
        video.close()
        return output_audio_path
    except Exception as e:
        raise Exception(f"Failed to extract audio from video: {str(e)}")

@app.route("/health", methods=["GET"])
def health_check():
    """Endpoint to check if API is running"""
    return jsonify({
        'status': 'API is running',
        'timestamp': datetime.datetime.now().isoformat(),
        'device': device,
        'compute_type': compute_type,
        'active_requests': active_requests,
        'max_duration_supported': MAX_FILE_DURATION,
        'supported_formats': list(ALLOWED_EXTENSIONS)
    })

@app.route("/status/busy", methods=["GET"])
def server_busy():
    """Endpoint to check if server is busy"""
    is_busy = active_requests >= MAX_CONCURRENT_REQUESTS
    return jsonify({
        'is_busy': is_busy,
        'active_requests': active_requests,
        'max_capacity': MAX_CONCURRENT_REQUESTS
    })

@app.route("/whisper_transcribe", methods=["POST"])
def transcribe():
    global active_requests
    
    if not request_semaphore.acquire(blocking=False):
        return jsonify({'error': 'Server busy'}), 503
    
    active_requests += 1
    start_time = time.time()
    temp_file_path = None
    temp_audio_path = None
    
    try:
        if 'file' not in request.files:
            return jsonify({'error': 'No file provided'}), 400
            
        file = request.files['file']
        if not (file and allowed_file(file.filename)):
            return jsonify({'error': f'Invalid file format. Supported: {", ".join(ALLOWED_EXTENSIONS)}'}), 400

        # Save uploaded file to temporary location
        temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename))
        file.save(temp_file_path)
        
        # Check if file is a video and extract audio if necessary
        file_extension = file.filename.rsplit('.', 1)[1].lower()
        if file_extension in ALLOWED_VIDEO_EXTENSIONS:
            temp_audio_path = os.path.join(TEMPORARY_FOLDER, f"temp_audio_{int(time.time())}.wav")
            extract_audio_from_video(temp_file_path, temp_audio_path)
            transcription_file = temp_audio_path
        else:
            transcription_file = temp_file_path
        
        # Transcribe the audio file
        segments, _ = wmodel.transcribe(
            transcription_file,
            beam_size=beamsize,
            vad_filter=True,
            without_timestamps=True,
            compression_ratio_threshold=2.4,
            word_timestamps=False
        )
        
        full_text = " ".join(segment.text for segment in segments)
        return jsonify({
            'transcription': full_text,
            'file_type': 'video' if file_extension in ALLOWED_VIDEO_EXTENSIONS else 'audio'
        }), 200
        
    except Exception as e:
        return jsonify({'error': str(e)}), 500
        
    finally:
        cleanup_temp_files(temp_file_path, temp_audio_path)
        active_requests -= 1
        request_semaphore.release()
        print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})")

if __name__ == "__main__":
    # Create temporary folder if it doesn't exist
    if not os.path.exists(TEMPORARY_FOLDER):
        os.makedirs(TEMPORARY_FOLDER)
    
    app.run(host="0.0.0.0", port=7860, threaded=True)