Spaces:
Sleeping
Sleeping
Commit
·
0cb2c03
1
Parent(s):
f928012
added code to track transcription process running in background
Browse files- ASR_Server.py +21 -0
ASR_Server.py
CHANGED
@@ -91,6 +91,15 @@ def generateTranscript(ASR_model, csv_path, output_dir="./"):
|
|
91 |
import soundfile as sf
|
92 |
from transformers import pipeline
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
# Load dataset without decoding audio (required!)
|
95 |
dataset = load_dataset("satyamr196/asr_fairness_audio", split="train")
|
96 |
# dataset = dataset.with_format("python", decode_audio=False)
|
@@ -103,6 +112,9 @@ def generateTranscript(ASR_model, csv_path, output_dir="./"):
|
|
103 |
return
|
104 |
|
105 |
# Load CSV
|
|
|
|
|
|
|
106 |
df = pd.read_csv(csv_path)
|
107 |
print(f"CSV Loaded with {len(df)} rows")
|
108 |
|
@@ -157,6 +169,9 @@ def generateTranscript(ASR_model, csv_path, output_dir="./"):
|
|
157 |
transcripts.append("")
|
158 |
rtfx_score.append(0)
|
159 |
|
|
|
|
|
|
|
160 |
# Save results
|
161 |
df["transcript"] = transcripts
|
162 |
df["rtfx"] = rtfx_score
|
@@ -169,6 +184,8 @@ def generateTranscript(ASR_model, csv_path, output_dir="./"):
|
|
169 |
print(f"Created directory: {output_dir}")
|
170 |
|
171 |
df.to_csv(output_csv_path, index=False)
|
|
|
|
|
172 |
|
173 |
print(f"\n📄 Transcripts saved to: {output_csv_path}")
|
174 |
|
@@ -207,5 +224,9 @@ def asr_models():
|
|
207 |
print("Transcription started in background")
|
208 |
return jsonify({"asr_models": models})
|
209 |
|
|
|
|
|
|
|
|
|
210 |
# if __name__ == "__main__":
|
211 |
# app.run(debug=True)
|
|
|
91 |
import soundfile as sf
|
92 |
from transformers import pipeline
|
93 |
|
94 |
+
global job_status
|
95 |
+
job_status.update({
|
96 |
+
"running": True,
|
97 |
+
"model": ASR_model,
|
98 |
+
"completed": 0,
|
99 |
+
"message": "Starting transcription..."
|
100 |
+
})
|
101 |
+
|
102 |
+
|
103 |
# Load dataset without decoding audio (required!)
|
104 |
dataset = load_dataset("satyamr196/asr_fairness_audio", split="train")
|
105 |
# dataset = dataset.with_format("python", decode_audio=False)
|
|
|
112 |
return
|
113 |
|
114 |
# Load CSV
|
115 |
+
total = len(df)
|
116 |
+
job_status["total"] = total
|
117 |
+
|
118 |
df = pd.read_csv(csv_path)
|
119 |
print(f"CSV Loaded with {len(df)} rows")
|
120 |
|
|
|
169 |
transcripts.append("")
|
170 |
rtfx_score.append(0)
|
171 |
|
172 |
+
job_status["completed"] = idx + 1
|
173 |
+
job_status["message"] = f"Processing {idx + 1}/{total}"
|
174 |
+
|
175 |
# Save results
|
176 |
df["transcript"] = transcripts
|
177 |
df["rtfx"] = rtfx_score
|
|
|
184 |
print(f"Created directory: {output_dir}")
|
185 |
|
186 |
df.to_csv(output_csv_path, index=False)
|
187 |
+
job_status["running"] = False
|
188 |
+
job_status["message"] = "Transcription completed."
|
189 |
|
190 |
print(f"\n📄 Transcripts saved to: {output_csv_path}")
|
191 |
|
|
|
224 |
print("Transcription started in background")
|
225 |
return jsonify({"asr_models": models})
|
226 |
|
227 |
+
@app.route("/job_status")
|
228 |
+
def get_status():
|
229 |
+
return jsonify(job_status)
|
230 |
+
|
231 |
# if __name__ == "__main__":
|
232 |
# app.run(debug=True)
|