File size: 1,594 Bytes
56a884d
0117db0
56a884d
0117db0
56a884d
0117db0
 
 
56a884d
 
 
 
 
0117db0
 
56a884d
0117db0
 
 
56a884d
 
0117db0
 
 
56a884d
 
0117db0
56a884d
0117db0
 
 
56a884d
 
 
 
 
 
 
 
 
 
 
0117db0
 
e480215
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from flask import Flask, request, jsonify
from faster_whisper import WhisperModel
import torch
import io
import time

app = Flask(__name__)

# Device check for faster-whisper
device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = "float16" if device == "cuda" else "int8"
print(f"Using device: {device} with compute_type: {compute_type}")

# Faster Whisper setup
beamsize = 2
wmodel = WhisperModel("guillaumekln/faster-whisper-small", device=device, compute_type=compute_type)

@app.route("/whisper_transcribe", methods=["POST"])
def whisper_transcribe():
    if 'audio' not in request.files:
        return jsonify({'error': 'No file provided'}), 400

    audio_file = request.files['audio']
    allowed_extensions = {'mp3', 'wav', 'ogg', 'm4a'}
    if not (audio_file and audio_file.filename.lower().split('.')[-1] in allowed_extensions):
        return jsonify({'error': 'Invalid file format'}), 400

    print(f"Transcribing audio on {device}")
    audio_bytes = audio_file.read()
    audio_file = io.BytesIO(audio_bytes)

    try:
        segments, info = wmodel.transcribe(audio_file, beam_size=beamsize)
        text = ''
        starttime = time.time()
        for segment in segments:
            text += segment.text
        print(f"Time to transcribe: {time.time() - starttime} seconds")
        return jsonify({'transcription': text})
    except Exception as e:
        print(f"Transcription error: {str(e)}")
        return jsonify({'error': 'Transcription failed'}), 500

if __name__ == "__main__":
    app.run(host="0.0.0.0", debug=True, port=7860, threaded=True)