Spaces:
Sleeping
Sleeping
Commit
·
f21777c
1
Parent(s):
8e62829
bug fix : ASR_model = openai/whisper-tiny, csv_transcript, csv_result values were hardcoded, making them query string dependent inside generateTranscript
Browse files- ASR_Server.py +8 -4
ASR_Server.py
CHANGED
@@ -11,7 +11,7 @@ from utils.generate_box_plot import box_plot_data
|
|
11 |
|
12 |
# Set the cache directory for Hugging Face datasets
|
13 |
os.environ["HF_HOME"] = "/tmp/huggingface"
|
14 |
-
ASR_model = "openai/whisper-tiny" # Replace with your ASR model
|
15 |
#Check cpu score
|
16 |
import timeit
|
17 |
cpu_score = timeit.timeit("sum(range(1000000))", number=5)
|
@@ -27,8 +27,8 @@ job_status = {
|
|
27 |
}
|
28 |
|
29 |
csv_path = "test.csv"
|
30 |
-
csv_transcript = f'test_with_{ASR_model.replace("/", "_")}.csv'
|
31 |
-
csv_result = f'test_with_{ASR_model.replace("/","_")}_WER.csv'
|
32 |
df = pd.read_csv(csv_path)
|
33 |
print(f"CSV Loaded with {len(df)} rows")
|
34 |
|
@@ -54,12 +54,16 @@ def generateTranscript(ASR_model):
|
|
54 |
"total": None
|
55 |
})
|
56 |
|
|
|
|
|
57 |
# Check if transcript already exists
|
58 |
df_transcript = download_csv(csv_transcript)
|
59 |
if(df_transcript is None):
|
60 |
print(f"CSV not found in the dataset repo. Proceeding to generate transcript.")
|
61 |
else:
|
62 |
print(f"Transcript already exists for model {ASR_model}. Skipping transcription.")
|
|
|
|
|
63 |
return
|
64 |
|
65 |
# # Load test.csv
|
@@ -170,7 +174,7 @@ def asr_models():
|
|
170 |
]
|
171 |
|
172 |
def background_job():
|
173 |
-
generateTranscript(
|
174 |
|
175 |
# Start the background job in a separate thread
|
176 |
threading.Thread(target=background_job).start()
|
|
|
11 |
|
12 |
# Set the cache directory for Hugging Face datasets
|
13 |
os.environ["HF_HOME"] = "/tmp/huggingface"
|
14 |
+
# ASR_model = "openai/whisper-tiny" # Replace with your ASR model
|
15 |
#Check cpu score
|
16 |
import timeit
|
17 |
cpu_score = timeit.timeit("sum(range(1000000))", number=5)
|
|
|
27 |
}
|
28 |
|
29 |
csv_path = "test.csv"
|
30 |
+
# csv_transcript = f'test_with_{ASR_model.replace("/", "_")}.csv'
|
31 |
+
# csv_result = f'test_with_{ASR_model.replace("/","_")}_WER.csv'
|
32 |
df = pd.read_csv(csv_path)
|
33 |
print(f"CSV Loaded with {len(df)} rows")
|
34 |
|
|
|
54 |
"total": None
|
55 |
})
|
56 |
|
57 |
+
csv_transcript = f'test_with_{ASR_model.replace("/", "_")}.csv'
|
58 |
+
csv_result = f'test_with_{ASR_model.replace("/","_")}_WER.csv'
|
59 |
# Check if transcript already exists
|
60 |
df_transcript = download_csv(csv_transcript)
|
61 |
if(df_transcript is None):
|
62 |
print(f"CSV not found in the dataset repo. Proceeding to generate transcript.")
|
63 |
else:
|
64 |
print(f"Transcript already exists for model {ASR_model}. Skipping transcription.")
|
65 |
+
job_status["running"] = False
|
66 |
+
job_status["message"] = "Transcription Already existss"
|
67 |
return
|
68 |
|
69 |
# # Load test.csv
|
|
|
174 |
]
|
175 |
|
176 |
def background_job():
|
177 |
+
generateTranscript("openai/whisper-tiny")
|
178 |
|
179 |
# Start the background job in a separate thread
|
180 |
threading.Thread(target=background_job).start()
|