Spaces:

satyamr196
/

ASR-FairBench-Server

Running

App Files Files Community

satyamr196 commited on May 26

Commit

dde7018

1 Parent(s): fff62f1

Revert "1)Modified code to accomodate microsoft asr_models also which require their custom code to run, hence set trust_remote_code=True in pipe, 2)Added extra error handling, 3) Now, pipe is loaded in main thread instead of background thread."

Browse files

Files changed (1) hide show

ASR_Server.py +11 -48

ASR_Server.py CHANGED Viewed

@@ -43,7 +43,7 @@ dataset = load_dataset("satyamr196/asr_fairness_audio", split="train")
 # dataset = dataset.with_format("python", decode_audio=False)
 dataset = dataset.cast_column("audio", Audio(decode=False))
-def generateTranscript(ASR_model, pipe=None):
     import os
     import time
     import tqdm
@@ -94,10 +94,8 @@ def generateTranscript(ASR_model, pipe=None):
     total = len(df)
     job_status["total"] = total
-    if pipe is None:
-        pipe = pipeline("automatic-speech-recognition", model=ASR_model, trust_remote_code=True)
     # Initialize ASR pipeline
-    # pipe = pipeline("automatic-speech-recognition", model=ASR_model, trust_remote_code=True)
     # Column with filenames in the CSV
     filename_column = df.columns[0]
@@ -215,8 +213,6 @@ def get_status():
 @app.route('/api', methods=['GET'])
 def api():
-    from transformers import pipeline
     model = request.args.get('ASR_model', default="", type=str)
     # model = re.sub(r"\s+", "", model)
     model = re.sub(r"[^a-zA-Z0-9/_\-.]", "", model) # sanitize the model ID
@@ -250,50 +246,17 @@ def api():
                 'status': job_status
             })
-        try:
-            print(f"⏳ Loading model {model} in main thread...")
-            pipe = pipeline("automatic-speech-recognition", model=model, trust_remote_code=True)
-        except Exception as e:
-            return jsonify({
-                "error": f"Model load failed: {str(e)}",
-                "message": f"Model load failed: {str(e)}"
-            }), 500
-        def thread_wrapper(model, pipe):
-            try:
-                job_status["running"] = True
-                job_status["error"] = None
-                job_status["model"] = model
-                generateTranscript(model, pipe)
-                job_status["running"] = False
-                # return jsonify({
-                #     'message': f'Given Model {model} is being Evaluated, Please come back after a few hours and run the query again. Usually, it completes within an hour'
-                # }),200
-            except Exception as e:
-                print(f"❌ Background transcription for {model} failed:", e)
-                job_status["running"] = False
-                job_status["error"] = str(e)
-                # return jsonify({
-                #     "error": f"Background transcription failed: {str(e)}",
-                #     "message": f"Background transcription failed: {str(e)}"
-                # }), 500
-        # Then use:
-        thread = threading.Thread(target=thread_wrapper, args=(model, pipe), daemon=True)
-        thread.start()
         # thread = threading.Thread(target=generateTranscript, args=(model,), daemon=True)
-        # thread = threading.Thread(target=generateTranscript, args=(model,pipe))
-        # thread.start()
-        if job_status.get("error"):
-            return jsonify({
-                'message': f'❌transcription for model "{job_status.get("model")}" failed.',
-                'error': job_status["error"]
-            }), 500
-        return jsonify({
-            'message': f'Given Model {model} is being Evaluated. Please come back after a few hours and run the query again.',
-            'status': job_status
-        }), 202
 @app.route("/insert", methods=["POST"])
 def insert_document():

 # dataset = dataset.with_format("python", decode_audio=False)
 dataset = dataset.cast_column("audio", Audio(decode=False))
+def generateTranscript(ASR_model):
     import os
     import time
     import tqdm
     total = len(df)
     job_status["total"] = total
     # Initialize ASR pipeline
+    pipe = pipeline("automatic-speech-recognition", model=ASR_model)
     # Column with filenames in the CSV
     filename_column = df.columns[0]
 @app.route('/api', methods=['GET'])
 def api():
     model = request.args.get('ASR_model', default="", type=str)
     # model = re.sub(r"\s+", "", model)
     model = re.sub(r"[^a-zA-Z0-9/_\-.]", "", model) # sanitize the model ID
                 'status': job_status
             })
+        response = jsonify({
+            'message': f'Given Model {model} is being Evaluated, Please come back after a few hours and run the query again. Usually, it completes within an hour'
+        })
+        # Run `generateTranscript(model)` in a separate thread
+        # Start the transcript generation in a separate thread
         # thread = threading.Thread(target=generateTranscript, args=(model,), daemon=True)
+        thread = threading.Thread(target=generateTranscript, args=(model,))
+        thread.start()
+        return response
 @app.route("/insert", methods=["POST"])
 def insert_document():