Spaces:
Sleeping
Sleeping
File size: 3,977 Bytes
e7f2e98 4c399e4 e7f2e98 4c399e4 e0f07e3 e7f2e98 e0f07e3 e7f2e98 4c399e4 e0f07e3 e7f2e98 4c399e4 e7f2e98 4c399e4 e7f2e98 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import whisper
import numpy as np
from datetime import datetime
import torch
from flask import Flask,request,jsonify
from transformers import pipeline
import os
from dotenv import load_dotenv
from flask_cors import CORS
# Add this before loading the model
os.environ["XDG_CACHE_HOME"] = os.getcwd() + "/.cache" # Set custom cache location
load_dotenv()
# Then load the model
app=Flask(__name__)
CORS(app)
cache_dir = os.path.join(os.getcwd(), ".cache")
os.makedirs(cache_dir, exist_ok=True)
model = whisper.load_model("base", download_root=cache_dir) # Explicit cache location
SAMPLE_RATE=16000
DURATION=10
OUTPUT_FILE="recorded_audio.wav"
device="cuda" if torch.cuda.is_available() else "cpu"
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en",token=os.environ.get("HUGGING_FACE_AUTH_TOKEN"))
def detect(audio_path):
audio=whisper.load_audio(audio_path)
audio=whisper.pad_or_trim(audio)
mel=whisper.log_mel_spectrogram(audio).to(model.device)
_,probs=model.detect_language(mel)
detected_lang=max(probs,key=probs.get)
conf=probs[detected_lang]
res=model.transcribe(audio_path,language=detected_lang)
trans=res["text"]
translation = trans if detected_lang == "en" else translator(trans)[0]["translation_text"]
return detected_lang,conf,trans,translation
@app.route('/detect-language',methods=['POST'])
def detect_endpoint():
try:
if 'audio' not in request.files:
return jsonify({"status":"error","message":"No audio file provided"}),400
else:
audio_file=request.files['audio']
audio_path=os.path.join("uploads",audio_file.filename)
os.makedirs("uploads",exist_ok=True)
audio_file.save(audio_path)
'''else:
audio_path=record()'''
det_lang,conf,_,_=detect(audio_path)
if os.path.exists(audio_path) and audio_path!=OUTPUT_FILE:
os.remove(audio_path)
return jsonify({
"status":"success",
"detected_language":det_lang,
"confidence":f"{conf:.2%}"
})
except Exception as e:
return jsonify({"status":"error","message":str(e)}),500
@app.route('/transcription',methods=['POST'])
def transcription_endpoint():
try:
if 'audio' not in request.files:
return jsonify({"status":"error","message":"No audio file provided"}),400
else:
audio_file=request.files['audio']
audio_path=os.path.join("uploads",audio_file.filename)
os.makedirs("uploads",exist_ok=True)
audio_file.save(audio_path)
'''else:
audio_path=record()'''
_,_,trans,_=detect(audio_path)
if os.path.exists(audio_path) and audio_path!=OUTPUT_FILE:
os.remove(audio_path)
return jsonify({
"status":"success",
"transcription":trans
})
except Exception as e:
return jsonify({"status":"error","message":str(e)}),500
@app.route('/translation',methods=['POST'])
def translation_endpoint():
try:
if 'audio' not in request.files:
return jsonify({"status":"error","message":"No audio file provided"}),400
else:
audio_file=request.files['audio']
audio_path=os.path.join("uploads",audio_file.filename)
os.makedirs("uploads",exist_ok=True)
audio_file.save(audio_path)
'''else:
audio_path=record()'''
det_lang,_,_,trans=detect(audio_path)
if os.path.exists(audio_path) and audio_path!=OUTPUT_FILE:
os.remove(audio_path)
return jsonify({
"status":"success",
"detected_language":det_lang,
"translation":trans
})
except Exception as e:
return jsonify({"status":"error","message":str(e)}),500
if __name__=="__main__":
app.run(debug=False,host="0.0.0.0",port=int(os.environ.get("PORT",7860)))
|