File size: 5,537 Bytes
a9139ad 9de5e97 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
from flask import Flask, request, jsonify, Response
from faster_whisper import WhisperModel
import torch
import io
import time
import datetime
from threading import Semaphore
import os
from werkzeug.utils import secure_filename
import tempfile
from moviepy.editor import VideoFileClip # Added for video processing
app = Flask(__name__)
# Configuration
MAX_CONCURRENT_REQUESTS = 2 # Adjust based on server capacity
MAX_FILE_DURATION = 60 * 30 # 30 minutes maximum duration (adjust as needed)
TEMPORARY_FOLDER = tempfile.gettempdir()
ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac', 'aac', 'wma', 'opus', 'aiff'}
ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv', 'mpeg', 'mpg', '3gp'}
ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS)
# Device check for faster-whisper
device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = "float16" if device == "cuda" else "int8"
print(f"Using device: {device} with compute_type: {compute_type}")
# Faster Whisper setup with optimized parameters for long audio
beamsize = 2
wmodel = WhisperModel(
"guillaumekln/faster-whisper-small",
device=device,
compute_type=compute_type,
download_root="./model_cache"
)
# Concurrency control
request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)
active_requests = 0
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
def cleanup_temp_files(*file_paths):
"""Ensure temporary files are deleted after processing"""
for file_path in file_paths:
try:
if file_path and os.path.exists(file_path):
os.remove(file_path)
except Exception as e:
print(f"Error cleaning up temp file {file_path}: {str(e)}")
def extract_audio_from_video(video_path, output_audio_path):
"""Extract audio from a video file and save it as a temporary audio file"""
try:
video = VideoFileClip(video_path)
if video.duration > MAX_FILE_DURATION:
video.close()
raise ValueError(f"Video duration exceeds {MAX_FILE_DURATION} seconds")
video.audio.write_audiofile(output_audio_path)
video.close()
return output_audio_path
except Exception as e:
raise Exception(f"Failed to extract audio from video: {str(e)}")
@app.route("/health", methods=["GET"])
def health_check():
"""Endpoint to check if API is running"""
return jsonify({
'status': 'API is running',
'timestamp': datetime.datetime.now().isoformat(),
'device': device,
'compute_type': compute_type,
'active_requests': active_requests,
'max_duration_supported': MAX_FILE_DURATION,
'supported_formats': list(ALLOWED_EXTENSIONS)
})
@app.route("/status/busy", methods=["GET"])
def server_busy():
"""Endpoint to check if server is busy"""
is_busy = active_requests >= MAX_CONCURRENT_REQUESTS
return jsonify({
'is_busy': is_busy,
'active_requests': active_requests,
'max_capacity': MAX_CONCURRENT_REQUESTS
})
@app.route("/whisper_transcribe", methods=["POST"])
def transcribe():
global active_requests
if not request_semaphore.acquire(blocking=False):
return jsonify({'error': 'Server busy'}), 503
active_requests += 1
start_time = time.time()
temp_file_path = None
temp_audio_path = None
try:
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
file = request.files['file']
if not (file and allowed_file(file.filename)):
return jsonify({'error': f'Invalid file format. Supported: {", ".join(ALLOWED_EXTENSIONS)}'}), 400
# Save uploaded file to temporary location
temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename))
file.save(temp_file_path)
# Check if file is a video and extract audio if necessary
file_extension = file.filename.rsplit('.', 1)[1].lower()
if file_extension in ALLOWED_VIDEO_EXTENSIONS:
temp_audio_path = os.path.join(TEMPORARY_FOLDER, f"temp_audio_{int(time.time())}.wav")
extract_audio_from_video(temp_file_path, temp_audio_path)
transcription_file = temp_audio_path
else:
transcription_file = temp_file_path
# Transcribe the audio file
segments, _ = wmodel.transcribe(
transcription_file,
beam_size=beamsize,
vad_filter=True,
without_timestamps=True,
compression_ratio_threshold=2.4,
word_timestamps=False
)
full_text = " ".join(segment.text for segment in segments)
return jsonify({
'transcription': full_text,
'file_type': 'video' if file_extension in ALLOWED_VIDEO_EXTENSIONS else 'audio'
}), 200
except Exception as e:
return jsonify({'error': str(e)}), 500
finally:
cleanup_temp_files(temp_file_path, temp_audio_path)
active_requests -= 1
request_semaphore.release()
print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})")
if __name__ == "__main__":
# Create temporary folder if it doesn't exist
if not os.path.exists(TEMPORARY_FOLDER):
os.makedirs(TEMPORARY_FOLDER)
app.run(host="0.0.0.0", port=7860, threaded=True) |