File size: 3,977 Bytes
e7f2e98
 
 
4c399e4
e7f2e98
 
 
 
4c399e4
e0f07e3
 
e7f2e98
e0f07e3
 
e7f2e98
4c399e4
e0f07e3
 
 
 
e7f2e98
 
 
4c399e4
 
e7f2e98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c399e4
e7f2e98
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import whisper
import numpy as np
from datetime import datetime
import torch
from flask import Flask,request,jsonify
from transformers import pipeline
import os
from dotenv import load_dotenv
from flask_cors import CORS
# Add this before loading the model
os.environ["XDG_CACHE_HOME"] = os.getcwd() + "/.cache"  # Set custom cache location
load_dotenv()
# Then load the model

app=Flask(__name__)
CORS(app)
cache_dir = os.path.join(os.getcwd(), ".cache")
os.makedirs(cache_dir, exist_ok=True)
model = whisper.load_model("base", download_root=cache_dir)  # Explicit cache location

SAMPLE_RATE=16000
DURATION=10
OUTPUT_FILE="recorded_audio.wav"

device="cuda" if torch.cuda.is_available() else "cpu"
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en",token=os.environ.get("HUGGING_FACE_AUTH_TOKEN"))


def detect(audio_path):
    audio=whisper.load_audio(audio_path)
    audio=whisper.pad_or_trim(audio)

    mel=whisper.log_mel_spectrogram(audio).to(model.device)
    _,probs=model.detect_language(mel)

    detected_lang=max(probs,key=probs.get)
    conf=probs[detected_lang]

    res=model.transcribe(audio_path,language=detected_lang)
    trans=res["text"]

    translation = trans if detected_lang == "en" else translator(trans)[0]["translation_text"]
    return detected_lang,conf,trans,translation

@app.route('/detect-language',methods=['POST'])
def detect_endpoint():
    try:
        if 'audio' not in request.files:
            return jsonify({"status":"error","message":"No audio file provided"}),400
        else:
            audio_file=request.files['audio']
            audio_path=os.path.join("uploads",audio_file.filename)
            os.makedirs("uploads",exist_ok=True)
            audio_file.save(audio_path)
        '''else:
            audio_path=record()'''
        det_lang,conf,_,_=detect(audio_path)
        if os.path.exists(audio_path) and audio_path!=OUTPUT_FILE:
            os.remove(audio_path)
        return jsonify({
            "status":"success",
            "detected_language":det_lang,
            "confidence":f"{conf:.2%}"
        })
    except Exception as e:
        return jsonify({"status":"error","message":str(e)}),500
    

@app.route('/transcription',methods=['POST'])
def transcription_endpoint():
    try:
        if 'audio' not in request.files:
            return jsonify({"status":"error","message":"No audio file provided"}),400
        else:
            audio_file=request.files['audio']
            audio_path=os.path.join("uploads",audio_file.filename)
            os.makedirs("uploads",exist_ok=True)
            audio_file.save(audio_path)
        '''else:
            audio_path=record()'''
        _,_,trans,_=detect(audio_path)
        if os.path.exists(audio_path) and audio_path!=OUTPUT_FILE:
            os.remove(audio_path)
        return jsonify({
            "status":"success",
            "transcription":trans
        })
    except Exception as e:
        return jsonify({"status":"error","message":str(e)}),500
    

@app.route('/translation',methods=['POST'])
def translation_endpoint():
    try:
        if 'audio' not in request.files:
            return jsonify({"status":"error","message":"No audio file provided"}),400
        else:
            audio_file=request.files['audio']
            audio_path=os.path.join("uploads",audio_file.filename)
            os.makedirs("uploads",exist_ok=True)
            audio_file.save(audio_path)
        '''else:
            audio_path=record()'''
        det_lang,_,_,trans=detect(audio_path)
        if os.path.exists(audio_path) and audio_path!=OUTPUT_FILE:
            os.remove(audio_path)
        return jsonify({
            "status":"success",
            "detected_language":det_lang,
            "translation":trans
        })
    except Exception as e:
        return jsonify({"status":"error","message":str(e)}),500

if __name__=="__main__":
    app.run(debug=False,host="0.0.0.0",port=int(os.environ.get("PORT",7860)))