Spaces:
Running
Running
Commit
·
547836e
1
Parent(s):
fb5d66c
Major Update : created all the routes reqired, add utils folder containing helper fxns
Browse files- ASR_Server.py +133 -40
- requirements.txt +2 -1
- test.csv +1 -1
- utils/__init__.py +0 -0
- utils/__pycache__/__init__.cpython-313.pyc +0 -0
- utils/__pycache__/generateResults.cpython-313.pyc +0 -0
- utils/__pycache__/generate_box_plot.cpython-313.pyc +0 -0
- utils/__pycache__/generate_results.cpython-313.pyc +0 -0
- utils/__pycache__/load_csv.cpython-313.pyc +0 -0
- utils/audio_duration.py +5 -0
- utils/generate_box_plot.py +69 -0
- utils/generate_results.py +256 -0
- utils/load_csv.py +60 -0
ASR_Server.py
CHANGED
@@ -1,12 +1,17 @@
|
|
1 |
-
from flask import Flask, jsonify
|
|
|
2 |
from datasets import load_dataset, Audio
|
3 |
import pandas as pd
|
4 |
import os
|
5 |
import threading
|
|
|
|
|
|
|
|
|
6 |
|
7 |
-
|
8 |
os.environ["HF_HOME"] = "/tmp/huggingface"
|
9 |
-
|
10 |
#Check cpu score
|
11 |
import timeit
|
12 |
cpu_score = timeit.timeit("sum(range(1000000))", number=5)
|
@@ -15,14 +20,22 @@ print(f"🧠 CPU benchmark score: {cpu_score:.2f}")
|
|
15 |
job_status = {
|
16 |
"running": False,
|
17 |
"model": None,
|
18 |
-
"completed":
|
19 |
-
"
|
|
|
|
|
20 |
}
|
|
|
21 |
csv_path = "test.csv"
|
22 |
-
|
|
|
23 |
df = pd.read_csv(csv_path)
|
24 |
print(f"CSV Loaded with {len(df)} rows")
|
25 |
|
|
|
|
|
|
|
|
|
26 |
|
27 |
def generateTranscript(ASR_model):
|
28 |
import os
|
@@ -36,36 +49,25 @@ def generateTranscript(ASR_model):
|
|
36 |
"running": True,
|
37 |
"model": ASR_model,
|
38 |
"completed": 0,
|
39 |
-
"
|
|
|
|
|
40 |
})
|
41 |
|
42 |
-
|
43 |
-
# Load dataset without decoding audio (required!)
|
44 |
-
dataset = load_dataset("satyamr196/asr_fairness_audio", split="train")
|
45 |
-
# dataset = dataset.with_format("python", decode_audio=False)
|
46 |
-
dataset = dataset.cast_column("audio", Audio(decode=False))
|
47 |
-
|
48 |
-
output_csv_path = os.path.join(output_dir, f"test_with_{ASR_model}.csv")
|
49 |
# Check if transcript already exists
|
50 |
-
|
|
|
|
|
|
|
51 |
print(f"Transcript already exists for model {ASR_model}. Skipping transcription.")
|
52 |
return
|
53 |
|
54 |
-
# Load
|
55 |
-
df = pd.read_csv(csv_path)
|
56 |
-
print(f"CSV Loaded with {len(df)} rows")
|
57 |
|
58 |
total = len(df)
|
59 |
job_status["total"] = total
|
60 |
-
|
61 |
-
# import torch
|
62 |
-
# # Check if GPU is available
|
63 |
-
# if torch.cuda.is_available():
|
64 |
-
# device = 0
|
65 |
-
# print("Device set to use GPU")
|
66 |
-
# else:
|
67 |
-
# device = -1
|
68 |
-
# print("Device set to use CPU")
|
69 |
|
70 |
# Initialize ASR pipeline
|
71 |
pipe = pipeline("automatic-speech-recognition", model=ASR_model)
|
@@ -79,7 +81,7 @@ def generateTranscript(ASR_model):
|
|
79 |
# dataset = dataset.with_format("python", decode_audio=False)
|
80 |
dataset_map = {
|
81 |
os.path.basename(sample["audio"]["path"]).lower(): sample
|
82 |
-
for sample in dataset
|
83 |
}
|
84 |
|
85 |
transcripts = []
|
@@ -96,7 +98,9 @@ def generateTranscript(ASR_model):
|
|
96 |
audio_array, sample_rate = sf.read(file_path)
|
97 |
|
98 |
start_time = time.time()
|
99 |
-
result = pipe({"array": audio_array, "sampling_rate": sample_rate})
|
|
|
|
|
100 |
end_time = time.time()
|
101 |
|
102 |
transcript = result["text"]
|
@@ -106,7 +110,7 @@ def generateTranscript(ASR_model):
|
|
106 |
transcripts.append(transcript)
|
107 |
rtfx_score.append(rtfx)
|
108 |
|
109 |
-
print(f"✅ {filename}: RTFX = {rtfx:.2f}")
|
110 |
|
111 |
except Exception as e:
|
112 |
print(f"❌ Error with {filename}: {e}")
|
@@ -125,22 +129,21 @@ def generateTranscript(ASR_model):
|
|
125 |
df["transcript"] = transcripts
|
126 |
df["rtfx"] = rtfx_score
|
127 |
|
128 |
-
os.makedirs(output_dir, exist_ok=True)
|
129 |
-
# Create the directory if it doesn't exist
|
130 |
-
csv_output_dir = os.path.dirname(output_csv_path) # Get the directory path
|
131 |
-
if not os.path.exists(csv_output_dir): # Check if directory exists
|
132 |
-
os.makedirs(csv_output_dir) # Create directory if it doesn't exist
|
133 |
-
print(f"Created directory: {csv_output_dir}")
|
134 |
-
|
135 |
-
df.to_csv(output_csv_path, index=False)
|
136 |
job_status["running"] = False
|
137 |
job_status["message"] = "Transcription completed."
|
|
|
|
|
|
|
138 |
|
139 |
-
print(f"\n📄 Transcripts saved to: {output_csv_path}")
|
140 |
|
|
|
|
|
|
|
141 |
|
|
|
142 |
|
143 |
app = Flask(__name__)
|
|
|
144 |
|
145 |
@app.route("/")
|
146 |
def home():
|
@@ -165,8 +168,9 @@ def asr_models():
|
|
165 |
"Fairseq S2T",
|
166 |
"ESPnet"
|
167 |
]
|
|
|
168 |
def background_job():
|
169 |
-
generateTranscript(
|
170 |
|
171 |
# Start the background job in a separate thread
|
172 |
threading.Thread(target=background_job).start()
|
@@ -177,5 +181,94 @@ def asr_models():
|
|
177 |
def get_status():
|
178 |
return jsonify(job_status)
|
179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
# if __name__ == "__main__":
|
181 |
# app.run(debug=True)
|
|
|
1 |
+
from flask import Flask, jsonify, request
|
2 |
+
from flask_cors import CORS
|
3 |
from datasets import load_dataset, Audio
|
4 |
import pandas as pd
|
5 |
import os
|
6 |
import threading
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
from utils.load_csv import upload_csv, download_csv
|
9 |
+
from utils.generate_results import generateResults
|
10 |
+
from utils.generate_box_plot import box_plot_data
|
11 |
|
12 |
+
# Set the cache directory for Hugging Face datasets
|
13 |
os.environ["HF_HOME"] = "/tmp/huggingface"
|
14 |
+
ASR_model = "openai/whisper-tiny" # Replace with your ASR model
|
15 |
#Check cpu score
|
16 |
import timeit
|
17 |
cpu_score = timeit.timeit("sum(range(1000000))", number=5)
|
|
|
20 |
job_status = {
|
21 |
"running": False,
|
22 |
"model": None,
|
23 |
+
"completed": None,
|
24 |
+
"%_completed" : None,
|
25 |
+
"message": "No Transcription in progress",
|
26 |
+
"total": None
|
27 |
}
|
28 |
+
|
29 |
csv_path = "test.csv"
|
30 |
+
csv_transcript = f"test_with_{ASR_model.replace("/","_")}.csv"
|
31 |
+
csv_result = f"test_with_{ASR_model.replace("/","_")}_WER.csv"
|
32 |
df = pd.read_csv(csv_path)
|
33 |
print(f"CSV Loaded with {len(df)} rows")
|
34 |
|
35 |
+
# # Load dataset without decoding audio (required!)
|
36 |
+
# dataset = load_dataset("satyamr196/asr_fairness_audio", split="train")
|
37 |
+
# # dataset = dataset.with_format("python", decode_audio=False)
|
38 |
+
# dataset = dataset.cast_column("audio", Audio(decode=False))
|
39 |
|
40 |
def generateTranscript(ASR_model):
|
41 |
import os
|
|
|
49 |
"running": True,
|
50 |
"model": ASR_model,
|
51 |
"completed": 0,
|
52 |
+
"%_completed" : 0,
|
53 |
+
"message": "Starting transcription...",
|
54 |
+
"total": None
|
55 |
})
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
# Check if transcript already exists
|
58 |
+
df_transcript = download_csv(csv_transcript)
|
59 |
+
if(df_transcript is None):
|
60 |
+
print(f"CSV not found in the dataset repo. Proceeding to generate transcript.")
|
61 |
+
else:
|
62 |
print(f"Transcript already exists for model {ASR_model}. Skipping transcription.")
|
63 |
return
|
64 |
|
65 |
+
# # Load test.csv
|
66 |
+
# df = pd.read_csv(csv_path)
|
67 |
+
# print(f"CSV Loaded with {len(df)} rows")
|
68 |
|
69 |
total = len(df)
|
70 |
job_status["total"] = total
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
# Initialize ASR pipeline
|
73 |
pipe = pipeline("automatic-speech-recognition", model=ASR_model)
|
|
|
81 |
# dataset = dataset.with_format("python", decode_audio=False)
|
82 |
dataset_map = {
|
83 |
os.path.basename(sample["audio"]["path"]).lower(): sample
|
84 |
+
# for sample in dataset #uncomment this line to use the dataset
|
85 |
}
|
86 |
|
87 |
transcripts = []
|
|
|
98 |
audio_array, sample_rate = sf.read(file_path)
|
99 |
|
100 |
start_time = time.time()
|
101 |
+
# result = pipe({"array": audio_array, "sampling_rate": sample_rate})
|
102 |
+
result = pipe({"array": audio_array, "sampling_rate": sample_rate},return_timestamps=True)
|
103 |
+
|
104 |
end_time = time.time()
|
105 |
|
106 |
transcript = result["text"]
|
|
|
110 |
transcripts.append(transcript)
|
111 |
rtfx_score.append(rtfx)
|
112 |
|
113 |
+
print(f"✅ {filename}: RTFX = {rtfx:.2f}, Progress: {(idx + 1) * 100 / total} %")
|
114 |
|
115 |
except Exception as e:
|
116 |
print(f"❌ Error with {filename}: {e}")
|
|
|
129 |
df["transcript"] = transcripts
|
130 |
df["rtfx"] = rtfx_score
|
131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
job_status["running"] = False
|
133 |
job_status["message"] = "Transcription completed."
|
134 |
+
# df.to_csv(csv_result, index=False)
|
135 |
+
upload_csv(df, csv_transcript)
|
136 |
+
print(f"\n📄 Transcripts saved to: {csv_transcript}")
|
137 |
|
|
|
138 |
|
139 |
+
# generateTranscript(ASR_model)
|
140 |
+
# print(generate_results(ASR_model))
|
141 |
+
# print(box_plot_data(ASR_model))
|
142 |
|
143 |
+
# ! FLASK SERVER CODE :-
|
144 |
|
145 |
app = Flask(__name__)
|
146 |
+
CORS(app,origins="*")
|
147 |
|
148 |
@app.route("/")
|
149 |
def home():
|
|
|
168 |
"Fairseq S2T",
|
169 |
"ESPnet"
|
170 |
]
|
171 |
+
|
172 |
def background_job():
|
173 |
+
generateTranscript(ASR_model)
|
174 |
|
175 |
# Start the background job in a separate thread
|
176 |
threading.Thread(target=background_job).start()
|
|
|
181 |
def get_status():
|
182 |
return jsonify(job_status)
|
183 |
|
184 |
+
@app.route('/api', methods=['GET'])
|
185 |
+
def api():
|
186 |
+
model = request.args.get('ASR_model', default="", type=str)
|
187 |
+
csv_transcript = f"test_with_{model.replace("/","_")}.csv"
|
188 |
+
csv_result = f"test_with_{model.replace("/","_")}_WER.csv"
|
189 |
+
if not model:
|
190 |
+
return jsonify({'error': 'ASR_model parameter is required'}), 400 # Return 400 if model is missing
|
191 |
+
elif (download_csv(csv_transcript) is not None):
|
192 |
+
# Load the CSV file from the Hugging Face Hub
|
193 |
+
Results = generateResults(model)
|
194 |
+
wer_Gender, wer_SEG, wer_Ethnicity, wer_Language = box_plot_data(model)
|
195 |
+
|
196 |
+
return jsonify({
|
197 |
+
'message': f'{model} has been evaluated and results are shown below',
|
198 |
+
'endpoint': "/api",
|
199 |
+
'model': model,
|
200 |
+
'greet' : "Welcome to ASR-FairBench",
|
201 |
+
**Results,
|
202 |
+
'wer_Gender' : wer_Gender,
|
203 |
+
'wer_SEG' : wer_SEG,
|
204 |
+
'wer_Ethnicity' : wer_Ethnicity,
|
205 |
+
'wer_Language' : wer_Language
|
206 |
+
})
|
207 |
+
else:
|
208 |
+
# Check if `generateTranscript` is already running for this model
|
209 |
+
if job_status["running"] :
|
210 |
+
return jsonify({
|
211 |
+
'message': f'Transcription for {job_status["model"]} is in progress. Please wait for it to complete. Then submit your model again.',
|
212 |
+
'status': job_status
|
213 |
+
})
|
214 |
+
|
215 |
+
response = jsonify({
|
216 |
+
'message': f'Given Model {model} is being Evaluated, Please come back after a few hours and run the query again. Usually, it completes within an hour'
|
217 |
+
})
|
218 |
+
|
219 |
+
# Run `generateTranscript(model)` in a separate thread
|
220 |
+
# Start the transcript generation in a separate thread
|
221 |
+
# thread = threading.Thread(target=generateTranscript, args=(model,), daemon=True)
|
222 |
+
thread = threading.Thread(target=generateTranscript, args=(model,))
|
223 |
+
thread.start()
|
224 |
+
|
225 |
+
return response
|
226 |
+
|
227 |
+
@app.route("/insert", methods=["POST"])
|
228 |
+
def insert_document():
|
229 |
+
try:
|
230 |
+
data = request.json # Get JSON data from request
|
231 |
+
model_name = data.get("Model")
|
232 |
+
csv_filename = "leaderboard.csv"
|
233 |
+
|
234 |
+
# Try to download the leaderboard CSV from HF dataset
|
235 |
+
df = download_csv(csv_filename)
|
236 |
+
|
237 |
+
if df is None:
|
238 |
+
# If not found, create a new DataFrame with this single entry
|
239 |
+
df = pd.DataFrame([data])
|
240 |
+
else:
|
241 |
+
# Check if the model already exists in leaderboard
|
242 |
+
if model_name in df["Model"].values:
|
243 |
+
return jsonify({"exists": True})
|
244 |
+
# Append the new row
|
245 |
+
df = pd.concat([df, pd.DataFrame([data])], ignore_index=True)
|
246 |
+
|
247 |
+
# Upload the updated CSV back to the Hugging Face dataset
|
248 |
+
success = upload_csv(df, csv_filename)
|
249 |
+
if not success:
|
250 |
+
return jsonify({"exists": "Error", "error": "Upload to Hugging Face failed"})
|
251 |
+
|
252 |
+
return jsonify({"exists": False, "message": "Data inserted into leaderboard successfully!"})
|
253 |
+
|
254 |
+
except Exception as e:
|
255 |
+
return jsonify({"exists": "Error", "error": str(e)})
|
256 |
+
|
257 |
+
# Fetch all documents
|
258 |
+
@app.route("/fetch", methods=["GET"])
|
259 |
+
def fetch_documents():
|
260 |
+
try:
|
261 |
+
csv_filename = "leaderboard.csv"
|
262 |
+
df = download_csv(csv_filename)
|
263 |
+
|
264 |
+
if df is None:
|
265 |
+
return jsonify({"error": "Leaderboard CSV not found in Hugging Face dataset."})
|
266 |
+
|
267 |
+
documents = df.to_dict(orient="records") # Convert DataFrame to list of dicts
|
268 |
+
return jsonify({"data": documents})
|
269 |
+
|
270 |
+
except Exception as e:
|
271 |
+
return jsonify({"error": str(e)})
|
272 |
+
|
273 |
# if __name__ == "__main__":
|
274 |
# app.run(debug=True)
|
requirements.txt
CHANGED
@@ -14,4 +14,5 @@ flask
|
|
14 |
pymongo
|
15 |
flask-cors
|
16 |
pandas
|
17 |
-
tqdm
|
|
|
|
14 |
pymongo
|
15 |
flask-cors
|
16 |
pandas
|
17 |
+
tqdm
|
18 |
+
dotenv
|
test.csv
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
96ce0c2debfa8656fe16d30187d683df,hey facebook set my status uh to available,31 - 45,male,English,Low,"Native American, American Indian, or Alaska Native","male_English_Low_Native American, American Indian, or Alaska Native"
|
3 |
f3d12b16dd637efb9ce8142632d63f34,send text message to wolfgang hey uncle wolfgang i really miss you hope all is well let's talk soon,46 - 65,male,German,Affluent,White,male_German_Affluent_White
|
4 |
9859c9ab6ca0377d593ad245f39bc224,text john i am sorry about your loss,23 - 30,female,English,Low,Black or African American,female_English_Low_Black or African American
|
|
|
1 |
+
hash_name,transcription,age,gender,first_language,socioeconomic_bkgd,ethnicity,combined_column
|
2 |
96ce0c2debfa8656fe16d30187d683df,hey facebook set my status uh to available,31 - 45,male,English,Low,"Native American, American Indian, or Alaska Native","male_English_Low_Native American, American Indian, or Alaska Native"
|
3 |
f3d12b16dd637efb9ce8142632d63f34,send text message to wolfgang hey uncle wolfgang i really miss you hope all is well let's talk soon,46 - 65,male,German,Affluent,White,male_German_Affluent_White
|
4 |
9859c9ab6ca0377d593ad245f39bc224,text john i am sorry about your loss,23 - 30,female,English,Low,Black or African American,female_English_Low_Black or African American
|
utils/__init__.py
ADDED
File without changes
|
utils/__pycache__/__init__.cpython-313.pyc
ADDED
Binary file (145 Bytes). View file
|
|
utils/__pycache__/generateResults.cpython-313.pyc
ADDED
Binary file (10.1 kB). View file
|
|
utils/__pycache__/generate_box_plot.cpython-313.pyc
ADDED
Binary file (5.48 kB). View file
|
|
utils/__pycache__/generate_results.cpython-313.pyc
ADDED
Binary file (10.1 kB). View file
|
|
utils/__pycache__/load_csv.cpython-313.pyc
ADDED
Binary file (1.61 kB). View file
|
|
utils/audio_duration.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydub import AudioSegment
|
2 |
+
#The audio duration calculation
|
3 |
+
def get_audio_duration(audio_file):
|
4 |
+
audio = AudioSegment.from_file(audio_file)
|
5 |
+
return len(audio) / 1000 # Convert milliseconds to seconds
|
utils/generate_box_plot.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils.load_csv import download_csv
|
2 |
+
|
3 |
+
def box_plot_data(ASR_model):
|
4 |
+
|
5 |
+
# Load the CSV file
|
6 |
+
csv_result = f"test_with_{ASR_model.replace("/","_")}_WER.csv"
|
7 |
+
df = download_csv(csv_result)
|
8 |
+
|
9 |
+
# Display actual column names to check for issues
|
10 |
+
print(df.columns)
|
11 |
+
|
12 |
+
# Trim column names of any leading or trailing spaces
|
13 |
+
df.columns = df.columns.str.strip()
|
14 |
+
|
15 |
+
# Extract WER values for Male and Female
|
16 |
+
wer_Gender = {
|
17 |
+
"Male": df[df["gender"] == "male"]["WER"].tolist(),
|
18 |
+
"Female": df[df["gender"] == "female"]["WER"].tolist()
|
19 |
+
}
|
20 |
+
wer_SEG = {
|
21 |
+
"Low": df[df["socioeconomic_bkgd"] == "Low"]["WER"].tolist(),
|
22 |
+
"Affluent": df[df["socioeconomic_bkgd"] == "Affluent"]["WER"].tolist(),
|
23 |
+
"Medium": df[df["socioeconomic_bkgd"] == "Medium"]["WER"].tolist(),
|
24 |
+
}
|
25 |
+
wer_Ethnicity = {
|
26 |
+
"Asian, South Asian or Asian American": df[df["ethnicity"] == "Asian, South Asian or Asian American"]["WER"].tolist(),
|
27 |
+
"Black or African American": df[df["ethnicity"] == "Black or African American"]["WER"].tolist(),
|
28 |
+
"Hispanic, Latino, or Spanish": df[df["ethnicity"] == "Hispanic, Latino, or Spanish"]["WER"].tolist(),
|
29 |
+
"Middle Eastern or North African": df[df["ethnicity"] == "Middle Eastern or North African"]["WER"].tolist(),
|
30 |
+
"Native American, American Indian, or Alaska Native": df[df["ethnicity"] == "Native American, American Indian, or Alaska Native"]["WER"].tolist(),
|
31 |
+
"Native Hawaiian or Other Pacific Islander": df[df["ethnicity"] == "Native Hawaiian or Other Pacific Islander"]["WER"].tolist(),
|
32 |
+
"White": df[df["ethnicity"] == "White"]["WER"].tolist(),
|
33 |
+
}
|
34 |
+
wer_Language = {
|
35 |
+
"English": df[df["first_language"] == "English"]["WER"].tolist(),
|
36 |
+
"German": df[df["first_language"] == "German"]["WER"].tolist(),
|
37 |
+
"French": df[df["first_language"] == "French"]["WER"].tolist(),
|
38 |
+
"Arabic": df[df["first_language"] == "Arabic"]["WER"].tolist(),
|
39 |
+
"Cantonese": df[df["first_language"] == "Cantonese"]["WER"].tolist(),
|
40 |
+
"Creole": df[df["first_language"] == "Creole"]["WER"].tolist(),
|
41 |
+
"Dutch": df[df["first_language"] == "Dutch"]["WER"].tolist(),
|
42 |
+
"English/Turkish": df[df["first_language"] == "English/Turkish"]["WER"].tolist(),
|
43 |
+
"Filipino": df[df["first_language"] == "Filipino"]["WER"].tolist(),
|
44 |
+
"Hindi": df[df["first_language"] == "Hindi"]["WER"].tolist(),
|
45 |
+
"Hmong": df[df["first_language"] == "Hmong"]["WER"].tolist(),
|
46 |
+
"Hindi": df[df["first_language"] == "Hindi"]["WER"].tolist(),
|
47 |
+
"Indonesian": df[df["first_language"] == "Indonesian"]["WER"].tolist(),
|
48 |
+
"Italian": df[df["first_language"] == "Italian"]["WER"].tolist(),
|
49 |
+
"Japanese": df[df["first_language"] == "Japanese"]["WER"].tolist(),
|
50 |
+
"Korean": df[df["first_language"] == "Korean"]["WER"].tolist(),
|
51 |
+
"Laotian": df[df["first_language"] == "Laotian"]["WER"].tolist(),
|
52 |
+
"Malay": df[df["first_language"] == "Malay"]["WER"].tolist(),
|
53 |
+
"Malaysian": df[df["first_language"] == "Malaysian"]["WER"].tolist(),
|
54 |
+
"Mandarin": df[df["first_language"] == "Mandarin"]["WER"].tolist(),
|
55 |
+
"Marathi": df[df["first_language"] == "Marathi"]["WER"].tolist(),
|
56 |
+
"Nepali": df[df["first_language"] == "Nepali"]["WER"].tolist(),
|
57 |
+
"Other": df[df["first_language"] == "Other"]["WER"].tolist(),
|
58 |
+
"Portuguese": df[df["first_language"] == "Portuguese"]["WER"].tolist(),
|
59 |
+
"Russian": df[df["first_language"] == "Russian"]["WER"].tolist(),
|
60 |
+
"Spanish": df[df["first_language"] == "Spanish"]["WER"].tolist(),
|
61 |
+
"Tagalog": df[df["first_language"] == "Tagalog"]["WER"].tolist(),
|
62 |
+
"Turkish": df[df["first_language"] == "Turkish"]["WER"].tolist(),
|
63 |
+
"Russian": df[df["first_language"] == "Russian"]["WER"].tolist(),
|
64 |
+
"Ukrainian": df[df["first_language"] == "Ukrainian"]["WER"].tolist(),
|
65 |
+
"Urdu": df[df["first_language"] == "Urdu"]["WER"].tolist(),
|
66 |
+
"Vietnamese": df[df["first_language"] == "Vietnamese"]["WER"].tolist(),
|
67 |
+
}
|
68 |
+
|
69 |
+
return wer_Gender, wer_SEG, wer_Ethnicity, wer_Language
|
utils/generate_results.py
ADDED
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import re
|
3 |
+
import numpy as np
|
4 |
+
from jiwer import wer
|
5 |
+
import statsmodels.api as sm
|
6 |
+
import statsmodels.formula.api as smf
|
7 |
+
from utils.load_csv import download_csv, upload_csv
|
8 |
+
|
9 |
+
def generateResults(ASR_model):
|
10 |
+
|
11 |
+
# Define normalization function
|
12 |
+
def normalize_text(text):
|
13 |
+
"""
|
14 |
+
Normalize text by converting to lowercase, removing special characters,
|
15 |
+
except digits, and handling None or float values.
|
16 |
+
"""
|
17 |
+
if text is None or pd.isna(text): # Check for None or NaN
|
18 |
+
return ""
|
19 |
+
if isinstance(text, float): # Check for floats and convert them to empty string
|
20 |
+
return ""
|
21 |
+
text = text.lower() # Convert to lowercase
|
22 |
+
text = re.sub(r'[^a-z0-9\s]', '', text) # Keep only letters, digits, and spaces
|
23 |
+
return text.strip() # Remove leading/trailing spaces
|
24 |
+
|
25 |
+
|
26 |
+
# Load the CSV with whisper transcripts f"test_with_{ASR_model}.csv"
|
27 |
+
csv_transcript = f"test_with_{ASR_model.replace("/","_")}.csv"
|
28 |
+
# Read the CSV file
|
29 |
+
df = download_csv(csv_transcript)
|
30 |
+
|
31 |
+
if(df is None):
|
32 |
+
print(f"CSV not found in the dataset repo. Please generate the transcript file first.")
|
33 |
+
return
|
34 |
+
|
35 |
+
# Normalize original text and whisper transcripts
|
36 |
+
df['normalized_transcription'] = df[df.columns[1]].apply(normalize_text) # Replace 'original_text' with your column name
|
37 |
+
|
38 |
+
# Check if whisper transcript column exists
|
39 |
+
if 'transcript' in df.columns:
|
40 |
+
df['normalized_transcript'] = df[df.columns[8]].apply(normalize_text)
|
41 |
+
|
42 |
+
# Calculate WER
|
43 |
+
wer_scores = []
|
44 |
+
for index, row in df.iterrows():
|
45 |
+
original = row['normalized_transcription']
|
46 |
+
transcript = row['normalized_transcript']
|
47 |
+
if original and transcript:
|
48 |
+
wer_score = wer(original, transcript)
|
49 |
+
else:
|
50 |
+
wer_score = 1.0 # Maximum error if one text is missing
|
51 |
+
wer_scores.append(wer_score)
|
52 |
+
|
53 |
+
df['WER'] = wer_scores
|
54 |
+
# Compute IQR
|
55 |
+
Q1 = df['WER'].quantile(0.25)
|
56 |
+
Q3 = df['WER'].quantile(0.75)
|
57 |
+
IQR = Q3 - Q1
|
58 |
+
# Define outlier range
|
59 |
+
lower_bound = Q1 - 1.5 * IQR
|
60 |
+
upper_bound = Q3 + 1.5 * IQR
|
61 |
+
# Remove outliers
|
62 |
+
df = df[(df['WER'] >= lower_bound) & (df['WER'] <= upper_bound)]
|
63 |
+
else:
|
64 |
+
print("Column 'transcript' not found in CSV")
|
65 |
+
|
66 |
+
# Save the updated CSV
|
67 |
+
csv_result = f"test_with_{ASR_model.replace("/","_")}_WER.csv"
|
68 |
+
upload_csv(df,csv_result)
|
69 |
+
|
70 |
+
print(f"WER calculations saved to {csv_result}")
|
71 |
+
avg_wer = df["WER"].mean()
|
72 |
+
avg_rtfx = df["rtfx"].mean()
|
73 |
+
print(f"Average WER: {avg_wer} and Avg RTFX : {avg_rtfx}")
|
74 |
+
#----------------------------------------------------------------------------------------------------------
|
75 |
+
|
76 |
+
#----------------------------------------------------------------------------------------------------------
|
77 |
+
# Define protected attributes and label columns
|
78 |
+
protected_attributes = ['gender', 'first_language', 'socioeconomic_bkgd', 'ethnicity']
|
79 |
+
label_column = 'normalized_transcription'
|
80 |
+
prediction_column = 'normalized_transcript'
|
81 |
+
wer_column = 'WER'
|
82 |
+
|
83 |
+
data = df
|
84 |
+
|
85 |
+
# Function to calculate WER disparity
|
86 |
+
def calculate_wer_disparity(data, protected_attribute, wer_column):
|
87 |
+
groups = data[protected_attribute].unique()
|
88 |
+
wer_disparity = {}
|
89 |
+
for group in groups:
|
90 |
+
group_data = data[data[protected_attribute] == group]
|
91 |
+
avg_wer = group_data[wer_column].mean()
|
92 |
+
wer_disparity[group] = avg_wer
|
93 |
+
return wer_disparity
|
94 |
+
|
95 |
+
# Calculate WER disparity for each protected attribute
|
96 |
+
for attribute in protected_attributes:
|
97 |
+
disparity = calculate_wer_disparity(data, attribute, wer_column)
|
98 |
+
print(f"WER Disparity for {attribute}:", disparity)
|
99 |
+
#-------------------------------------------------------------------------------------------------------
|
100 |
+
|
101 |
+
#-------------------------------------------------------------------------------------------------------
|
102 |
+
data["Reference_words"] = data["normalized_transcription"].str.split().str.len()
|
103 |
+
|
104 |
+
# Compute word error count (WER_count)
|
105 |
+
data["WER_count"] = data["Reference_words"] * data["WER"]
|
106 |
+
|
107 |
+
df = data
|
108 |
+
|
109 |
+
categorical_cols = ['gender', 'first_language', 'socioeconomic_bkgd', 'ethnicity']
|
110 |
+
for col in categorical_cols:
|
111 |
+
df[col] = df[col].astype("category")
|
112 |
+
|
113 |
+
# Offset: log of reference word count (to adjust for different transcript lengths)
|
114 |
+
df["log_Ref_Words"] = np.log(df["Reference_words"] + 1) # Adding 1 to avoid log(0)
|
115 |
+
|
116 |
+
# Fit a Mixed-Effects Poisson Regression Model
|
117 |
+
mixed_model = smf.mixedlm(
|
118 |
+
formula="WER_count ~ log_Ref_Words + age + gender + first_language + socioeconomic_bkgd + ethnicity", # Fixed effects
|
119 |
+
data=df,
|
120 |
+
groups=df["combined_column"] # Random effect on speaker
|
121 |
+
|
122 |
+
).fit()
|
123 |
+
|
124 |
+
# Display results
|
125 |
+
# print(mixed_model.summary())
|
126 |
+
|
127 |
+
#--------------------------------------------------------------------------------------------------------------------------
|
128 |
+
|
129 |
+
#--------------------------------------------------------------------------------------------------------------------------
|
130 |
+
from scipy.stats import chi2
|
131 |
+
|
132 |
+
# Assume 'mixed_model' is your already-fitted mixed-effects model and 'df' is your DataFrame.
|
133 |
+
# Also assume df["log_Ref_Words"] = np.log(df["Reference_words"] + 1)
|
134 |
+
params = mixed_model.params
|
135 |
+
|
136 |
+
# Set fixed values for continuous predictors:
|
137 |
+
fixed_log_ref = df["log_Ref_Words"].mean()
|
138 |
+
baseline_log = params["Intercept"] + params["log_Ref_Words"] * fixed_log_ref
|
139 |
+
exposure = np.exp(fixed_log_ref) - 1
|
140 |
+
|
141 |
+
def compute_predicted_error_rate(category, level, params, baseline_log, exposure):
|
142 |
+
"""Computes the predicted WER (error rate) for a given level of a demographic attribute."""
|
143 |
+
coef_name = f"{category}[T.{level}]"
|
144 |
+
effect = params.get(coef_name, 0) # For the baseline level, effect is 0.
|
145 |
+
pred_log = baseline_log + effect
|
146 |
+
pred_count = np.exp(pred_log)
|
147 |
+
return pred_count / exposure
|
148 |
+
|
149 |
+
def compute_category_fairness(category, params, baseline_log, exposure, df):
|
150 |
+
"""
|
151 |
+
For a given category, compute:
|
152 |
+
- Predicted error rates for each subgroup level.
|
153 |
+
- Raw fairness scores (0-100 scale: 100 = best, 0 = worst) based on linear scaling.
|
154 |
+
- A weighted category fairness score using group proportions.
|
155 |
+
"""
|
156 |
+
levels = df[category].cat.categories
|
157 |
+
predictions = {}
|
158 |
+
for lvl in levels:
|
159 |
+
predictions[lvl] = compute_predicted_error_rate(category, lvl, params, baseline_log, exposure)
|
160 |
+
|
161 |
+
# Convert predictions to a Series.
|
162 |
+
pred_series = pd.Series(predictions)
|
163 |
+
min_pred, max_pred = pred_series.min(), pred_series.max()
|
164 |
+
|
165 |
+
# Compute raw fairness scores: if all levels are identical, assign 100 to everyone.
|
166 |
+
if max_pred == min_pred:
|
167 |
+
raw_fairness = pred_series.apply(lambda x: 100.0)
|
168 |
+
else:
|
169 |
+
raw_fairness = pred_series.apply(lambda x: 100 * (1 - (x - min_pred) / (max_pred - min_pred)))
|
170 |
+
|
171 |
+
# Weight the subgroup fairness scores by their sample proportions in the dataset.
|
172 |
+
group_proportions = df[category].value_counts(normalize=True)
|
173 |
+
# Ensure ordering matches the fairness scores index:
|
174 |
+
group_proportions = group_proportions.reindex(raw_fairness.index, fill_value=0)
|
175 |
+
weighted_category_fairness = np.average(raw_fairness, weights=group_proportions)
|
176 |
+
|
177 |
+
return pred_series, raw_fairness, weighted_category_fairness
|
178 |
+
|
179 |
+
def perform_lrt(attribute, df):
|
180 |
+
"""Performs Likelihood Ratio Test (LRT) to test the overall significance of an attribute."""
|
181 |
+
full_model = smf.mixedlm(f"WER ~ {attribute} + log_Ref_Words", df, groups=df["combined_column"]).fit()
|
182 |
+
reduced_model = smf.mixedlm("WER ~ log_Ref_Words", df, groups=df["combined_column"]).fit()
|
183 |
+
lr_stat = 2 * (full_model.llf - reduced_model.llf)
|
184 |
+
df_diff = full_model.df_modelwc - reduced_model.df_modelwc
|
185 |
+
p_value = chi2.sf(lr_stat, df_diff)
|
186 |
+
return p_value
|
187 |
+
|
188 |
+
# List of attributes to evaluate
|
189 |
+
categories = ['gender', 'first_language', 'socioeconomic_bkgd', 'ethnicity']
|
190 |
+
results = {}
|
191 |
+
adjusted_category_scores = [] # To store adjusted fairness scores for each category.
|
192 |
+
weights_for_categories = [] # Weight each category based on significance if desired.
|
193 |
+
|
194 |
+
for cat in categories:
|
195 |
+
preds, raw_fairness, category_raw_score = compute_category_fairness(cat, params, baseline_log, exposure, df)
|
196 |
+
# Perform LRT to get overall significance for this attribute.
|
197 |
+
lrt_p_value = perform_lrt(cat, df)
|
198 |
+
|
199 |
+
# Compute multiplier based on significance.
|
200 |
+
# If p-value < 0.05, we penalize the fairness score proportionally.
|
201 |
+
multiplier = (lrt_p_value / 0.05) if lrt_p_value < 0.05 else 1.0
|
202 |
+
|
203 |
+
# Adjusted fairness score for the category:
|
204 |
+
adjusted_score = category_raw_score * multiplier
|
205 |
+
|
206 |
+
# Save results.
|
207 |
+
results[cat] = {
|
208 |
+
'Predicted Error Rates': preds,
|
209 |
+
'Raw Fairness Scores': raw_fairness,
|
210 |
+
# 'Weighted Raw Fairness Score': category_raw_score,
|
211 |
+
# 'LRT p-value': lrt_p_value,
|
212 |
+
'Adjusted Category Fairness Score': adjusted_score
|
213 |
+
}
|
214 |
+
|
215 |
+
# For overall score, we could weight categories (here we simply use the adjusted score).
|
216 |
+
adjusted_category_scores.append(adjusted_score)
|
217 |
+
# Optionally, use multiplier as a weight for overall aggregation.
|
218 |
+
weights_for_categories.append(multiplier)
|
219 |
+
|
220 |
+
# Compute overall fairness score across attributes using the adjusted category scores.
|
221 |
+
overall_fairness_score = np.average(adjusted_category_scores)
|
222 |
+
#FAAS is the Fairness Adjusted ASR Score based on which models will be ranked
|
223 |
+
faas = 10*np.log10(overall_fairness_score/avg_wer)
|
224 |
+
print("Fairness Adjusted ASR Score for the model is", faas)
|
225 |
+
# print("\nFinal Overall Fairness Score (Weighted Average over Categories):", overall_fairness_score) # used for summary_speedometer,Leaderboard
|
226 |
+
# print(results['gender'])
|
227 |
+
# print(results['gender']['Predicted Error Rates'])
|
228 |
+
# print(results['gender']['Adjusted Category Fairness Score'])
|
229 |
+
print("________________________________")
|
230 |
+
Results = {
|
231 |
+
'Predicted Error Rates': {
|
232 |
+
'gender': results['gender']['Predicted Error Rates'].to_dict(), # Convert Series to dict
|
233 |
+
'first_language': results['first_language']['Predicted Error Rates'].to_dict(),
|
234 |
+
'socioeconomic_bkgd': results['socioeconomic_bkgd']['Predicted Error Rates'].to_dict(),
|
235 |
+
'ethnicity': results['ethnicity']['Predicted Error Rates'].to_dict()
|
236 |
+
},
|
237 |
+
'Raw Fairness Scores': {
|
238 |
+
'gender': results['gender']['Raw Fairness Scores'].to_dict(),
|
239 |
+
'first_language': results['first_language']['Raw Fairness Scores'].to_dict(),
|
240 |
+
'socioeconomic_bkgd': results['socioeconomic_bkgd']['Raw Fairness Scores'].to_dict(),
|
241 |
+
'ethnicity': results['ethnicity']['Raw Fairness Scores'].to_dict()
|
242 |
+
},
|
243 |
+
'Adjusted Category Fairness Score': {
|
244 |
+
'gender': float(results['gender']['Adjusted Category Fairness Score']), # Convert NumPy float to Python float
|
245 |
+
'first_language': float(results['first_language']['Adjusted Category Fairness Score']),
|
246 |
+
'socioeconomic_bkgd': float(results['socioeconomic_bkgd']['Adjusted Category Fairness Score']),
|
247 |
+
'ethnicity': float(results['ethnicity']['Adjusted Category Fairness Score'])
|
248 |
+
},
|
249 |
+
'Overall Fairness Score': float(overall_fairness_score), # Convert NumPy float to Python float
|
250 |
+
'Avg_wer': float(avg_wer), # Convert NumPy float to Python float
|
251 |
+
'Avg_rtfx': float(avg_rtfx), # Convert NumPy float to Python float
|
252 |
+
'FAAS': float(faas), # Convert NumPy float to Python float
|
253 |
+
'ASR_model': ASR_model,
|
254 |
+
}
|
255 |
+
# print(Results)
|
256 |
+
return Results
|
utils/load_csv.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
from huggingface_hub import HfApi, hf_hub_download
|
3 |
+
import os
|
4 |
+
import io
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
# Load environment variables from .env file
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
# ASR_model = "openai/whisper-largev2" # Replace with your ASR model
|
11 |
+
# csv_path = "test.csv" #read from local
|
12 |
+
# csv_transcript = f"test_with_{ASR_model.replace("/","_")}.csv" # to save in dataset repo
|
13 |
+
# csv_result = f"test_with_{ASR_model.replace("/","_")}_WER.csv" # to save in dataset repo
|
14 |
+
|
15 |
+
# df = pd.read_csv(csv_path)
|
16 |
+
# print(f"CSV Loaded with {len(df)} rows")
|
17 |
+
|
18 |
+
def upload_csv(df,csv_filename):
|
19 |
+
csv_buffer = io.BytesIO()
|
20 |
+
df.to_csv(csv_buffer, index=False)
|
21 |
+
csv_buffer.seek(0)
|
22 |
+
|
23 |
+
try:
|
24 |
+
# Upload the generated csv to Hugging Face Hub
|
25 |
+
api = HfApi(token=os.getenv("HF_TOKEN"))
|
26 |
+
print(f"✅ CSV uploading : {csv_filename}")
|
27 |
+
api.upload_file(
|
28 |
+
path_or_fileobj=csv_buffer,
|
29 |
+
path_in_repo=csv_filename,
|
30 |
+
repo_id="satyamr196/asr_fairness_results",
|
31 |
+
repo_type="dataset"
|
32 |
+
)
|
33 |
+
return True
|
34 |
+
except Exception as e:
|
35 |
+
print(f"⚠️ Could not upload CSV: {csv_filename} — {e}")
|
36 |
+
return False
|
37 |
+
|
38 |
+
# upload_csv(df,f"test_with_{ASR_model.replace("/","_")}_WER.csv");
|
39 |
+
|
40 |
+
def download_csv(csv_filename):
|
41 |
+
repo_id = "satyamr196/asr_fairness_results"
|
42 |
+
|
43 |
+
try:
|
44 |
+
# Download the CSV file from the dataset repo
|
45 |
+
csv_path = hf_hub_download(repo_id=repo_id, filename=csv_filename, repo_type="dataset")
|
46 |
+
# Load into pandas
|
47 |
+
return pd.read_csv(csv_path)
|
48 |
+
except Exception as e:
|
49 |
+
# print(f"⚠️ Could not load CSV: {csv_filename} — {e}")
|
50 |
+
return None
|
51 |
+
|
52 |
+
|
53 |
+
# # # Load the csv from the Hugging Face Hub
|
54 |
+
# df = download_csv(csv_result)
|
55 |
+
# if(df is None):
|
56 |
+
# print(f"CSV not found in the dataset repo. Please upload the file first.")
|
57 |
+
# else:
|
58 |
+
# print(f"CSV Loaded with {len(df)} rows")
|
59 |
+
|
60 |
+
# print(df)
|