Spaces:

satyamr196
/

ASR-FairBench-Server

Running

App Files Files Community

satyamr196 commited on May 14

Commit

547836e

1 Parent(s): fb5d66c

Major Update : created all the routes reqired, add utils folder containing helper fxns

Browse files

Files changed (13) hide show

ASR_Server.py +133 -40
requirements.txt +2 -1
test.csv +1 -1
utils/__init__.py +0 -0
utils/__pycache__/__init__.cpython-313.pyc +0 -0
utils/__pycache__/generateResults.cpython-313.pyc +0 -0
utils/__pycache__/generate_box_plot.cpython-313.pyc +0 -0
utils/__pycache__/generate_results.cpython-313.pyc +0 -0
utils/__pycache__/load_csv.cpython-313.pyc +0 -0
utils/audio_duration.py +5 -0
utils/generate_box_plot.py +69 -0
utils/generate_results.py +256 -0
utils/load_csv.py +60 -0

ASR_Server.py CHANGED Viewed

@@ -1,12 +1,17 @@
-from flask import Flask, jsonify
 from datasets import load_dataset, Audio
 import pandas as pd
 import os
 import threading
-import os
 os.environ["HF_HOME"] = "/tmp/huggingface"
 #Check cpu score
 import timeit
 cpu_score = timeit.timeit("sum(range(1000000))", number=5)
@@ -15,14 +20,22 @@ print(f"🧠 CPU benchmark score: {cpu_score:.2f}")
 job_status = {
     "running": False,
     "model": None,
-    "completed": 0,
-    "message": "No job running"
 }
 csv_path = "test.csv"
-output_dir="data"
 df = pd.read_csv(csv_path)
 print(f"CSV Loaded with {len(df)} rows")
 def generateTranscript(ASR_model):
     import os
@@ -36,36 +49,25 @@ def generateTranscript(ASR_model):
         "running": True,
         "model": ASR_model,
         "completed": 0,
-        "message": "Starting transcription..."
     })
-    # Load dataset without decoding audio (required!)
-    dataset = load_dataset("satyamr196/asr_fairness_audio", split="train")
-    # dataset = dataset.with_format("python", decode_audio=False)
-    dataset = dataset.cast_column("audio", Audio(decode=False))
-    output_csv_path = os.path.join(output_dir, f"test_with_{ASR_model}.csv")
     # Check if transcript already exists
-    if os.path.exists(output_csv_path):
         print(f"Transcript already exists for model {ASR_model}. Skipping transcription.")
         return
-    # Load CSV
-    df = pd.read_csv(csv_path)
-    print(f"CSV Loaded with {len(df)} rows")
     total = len(df)
     job_status["total"] = total
-    # import torch
-    # # Check if GPU is available
-    # if torch.cuda.is_available():
-    #     device = 0
-    #     print("Device set to use GPU")
-    # else:
-    #     device = -1
-    #     print("Device set to use CPU")
     # Initialize ASR pipeline
     pipe = pipeline("automatic-speech-recognition", model=ASR_model)
@@ -79,7 +81,7 @@ def generateTranscript(ASR_model):
     # dataset = dataset.with_format("python", decode_audio=False)
     dataset_map = {
         os.path.basename(sample["audio"]["path"]).lower(): sample
-        for sample in dataset
     }
     transcripts = []
@@ -96,7 +98,9 @@ def generateTranscript(ASR_model):
                 audio_array, sample_rate = sf.read(file_path)
                 start_time = time.time()
-                result = pipe({"array": audio_array, "sampling_rate": sample_rate})
                 end_time = time.time()
                 transcript = result["text"]
@@ -106,7 +110,7 @@ def generateTranscript(ASR_model):
                 transcripts.append(transcript)
                 rtfx_score.append(rtfx)
-                print(f"✅ {filename}: RTFX = {rtfx:.2f}")
             except Exception as e:
                 print(f"❌ Error with {filename}: {e}")
@@ -125,22 +129,21 @@ def generateTranscript(ASR_model):
     df["transcript"] = transcripts
     df["rtfx"] = rtfx_score
-    os.makedirs(output_dir, exist_ok=True)
-    # Create the directory if it doesn't exist
-    csv_output_dir = os.path.dirname(output_csv_path) # Get the directory path
-    if not os.path.exists(csv_output_dir):  # Check if directory exists
-        os.makedirs(csv_output_dir)  # Create directory if it doesn't exist
-        print(f"Created directory: {csv_output_dir}")
-    df.to_csv(output_csv_path, index=False)
     job_status["running"] = False
     job_status["message"] = "Transcription completed."
-    print(f"\n📄 Transcripts saved to: {output_csv_path}")
 app = Flask(__name__)
 @app.route("/")
 def home():
@@ -165,8 +168,9 @@ def asr_models():
         "Fairseq S2T",
         "ESPnet"
     ]
     def background_job():
-        generateTranscript("openai/whisper-tiny")
     # Start the background job in a separate thread
     threading.Thread(target=background_job).start()
@@ -177,5 +181,94 @@ def asr_models():
 def get_status():
     return jsonify(job_status)
 # if __name__ == "__main__":
 #     app.run(debug=True)

+from flask import Flask, jsonify, request
+from flask_cors import CORS
 from datasets import load_dataset, Audio
 import pandas as pd
 import os
 import threading
+from dotenv import load_dotenv
+from utils.load_csv import upload_csv, download_csv
+from utils.generate_results import generateResults
+from utils.generate_box_plot import box_plot_data
+# Set the cache directory for Hugging Face datasets
 os.environ["HF_HOME"] = "/tmp/huggingface"
+ASR_model = "openai/whisper-tiny"  # Replace with your ASR model
 #Check cpu score
 import timeit
 cpu_score = timeit.timeit("sum(range(1000000))", number=5)
 job_status = {
     "running": False,
     "model": None,
+    "completed": None,
+    "%_completed" : None,
+    "message": "No Transcription in progress",
+    "total": None
 }
 csv_path = "test.csv"
+csv_transcript = f"test_with_{ASR_model.replace("/","_")}.csv"
+csv_result = f"test_with_{ASR_model.replace("/","_")}_WER.csv"
 df = pd.read_csv(csv_path)
 print(f"CSV Loaded with {len(df)} rows")
+# # Load dataset without decoding audio (required!)
+# dataset = load_dataset("satyamr196/asr_fairness_audio", split="train")
+# # dataset = dataset.with_format("python", decode_audio=False)
+# dataset = dataset.cast_column("audio", Audio(decode=False))
 def generateTranscript(ASR_model):
     import os
         "running": True,
         "model": ASR_model,
         "completed": 0,
+        "%_completed" : 0,
+        "message": "Starting transcription...",
+        "total": None
     })
     # Check if transcript already exists
+    df_transcript = download_csv(csv_transcript)
+    if(df_transcript is None):
+        print(f"CSV not found in the dataset repo. Proceeding to generate transcript.")
+    else:
         print(f"Transcript already exists for model {ASR_model}. Skipping transcription.")
         return
+    # # Load test.csv
+    # df = pd.read_csv(csv_path)
+    # print(f"CSV Loaded with {len(df)} rows")
     total = len(df)
     job_status["total"] = total
     # Initialize ASR pipeline
     pipe = pipeline("automatic-speech-recognition", model=ASR_model)
     # dataset = dataset.with_format("python", decode_audio=False)
     dataset_map = {
         os.path.basename(sample["audio"]["path"]).lower(): sample
+        # for sample in dataset #uncomment this line to use the dataset
     }
     transcripts = []
                 audio_array, sample_rate = sf.read(file_path)
                 start_time = time.time()
+                # result = pipe({"array": audio_array, "sampling_rate": sample_rate})
+                result = pipe({"array": audio_array, "sampling_rate": sample_rate},return_timestamps=True)
                 end_time = time.time()
                 transcript = result["text"]
                 transcripts.append(transcript)
                 rtfx_score.append(rtfx)
+                print(f"✅ {filename}: RTFX = {rtfx:.2f}, Progress: {(idx + 1) * 100 / total} %")
             except Exception as e:
                 print(f"❌ Error with {filename}: {e}")
     df["transcript"] = transcripts
     df["rtfx"] = rtfx_score
     job_status["running"] = False
     job_status["message"] = "Transcription completed."
+    # df.to_csv(csv_result, index=False)
+    upload_csv(df, csv_transcript)
+    print(f"\n📄 Transcripts saved to: {csv_transcript}")
+# generateTranscript(ASR_model)
+# print(generate_results(ASR_model))
+# print(box_plot_data(ASR_model))
+# ! FLASK SERVER CODE :-
 app = Flask(__name__)
+CORS(app,origins="*")
 @app.route("/")
 def home():
         "Fairseq S2T",
         "ESPnet"
     ]
     def background_job():
+        generateTranscript(ASR_model)
     # Start the background job in a separate thread
     threading.Thread(target=background_job).start()
 def get_status():
     return jsonify(job_status)
+@app.route('/api', methods=['GET'])
+def api():
+    model = request.args.get('ASR_model', default="", type=str)
+    csv_transcript = f"test_with_{model.replace("/","_")}.csv"
+    csv_result = f"test_with_{model.replace("/","_")}_WER.csv"
+    if not model:
+        return jsonify({'error': 'ASR_model parameter is required'}), 400  # Return 400 if model is missing
+    elif (download_csv(csv_transcript) is not None):
+        # Load the CSV file from the Hugging Face Hub
+        Results = generateResults(model)
+        wer_Gender, wer_SEG, wer_Ethnicity, wer_Language = box_plot_data(model)
+        return jsonify({
+            'message': f'{model} has been evaluated and results are shown below',
+            'endpoint': "/api",
+            'model': model,
+            'greet' : "Welcome to ASR-FairBench",
+            **Results,
+            'wer_Gender' : wer_Gender,
+            'wer_SEG' : wer_SEG,
+            'wer_Ethnicity' : wer_Ethnicity,
+            'wer_Language' : wer_Language
+        })
+    else:
+        # Check if `generateTranscript` is already running for this model
+        if job_status["running"] :
+            return jsonify({
+                'message': f'Transcription for {job_status["model"]} is in progress. Please wait for it to complete. Then submit your model again.',
+                'status': job_status
+            })
+        response = jsonify({
+            'message': f'Given Model {model} is being Evaluated, Please come back after a few hours and run the query again. Usually, it completes within an hour'
+        })
+        # Run `generateTranscript(model)` in a separate thread
+        # Start the transcript generation in a separate thread
+        # thread = threading.Thread(target=generateTranscript, args=(model,), daemon=True)
+        thread = threading.Thread(target=generateTranscript, args=(model,))
+        thread.start()
+        return response
+@app.route("/insert", methods=["POST"])
+def insert_document():
+    try:
+        data = request.json  # Get JSON data from request
+        model_name = data.get("Model")
+        csv_filename = "leaderboard.csv"
+        # Try to download the leaderboard CSV from HF dataset
+        df = download_csv(csv_filename)
+        if df is None:
+            # If not found, create a new DataFrame with this single entry
+            df = pd.DataFrame([data])
+        else:
+            # Check if the model already exists in leaderboard
+            if model_name in df["Model"].values:
+                return jsonify({"exists": True})
+            # Append the new row
+            df = pd.concat([df, pd.DataFrame([data])], ignore_index=True)
+        # Upload the updated CSV back to the Hugging Face dataset
+        success = upload_csv(df, csv_filename)
+        if not success:
+            return jsonify({"exists": "Error", "error": "Upload to Hugging Face failed"})
+        return jsonify({"exists": False, "message": "Data inserted into leaderboard successfully!"})
+    except Exception as e:
+        return jsonify({"exists": "Error", "error": str(e)})
+# Fetch all documents
+@app.route("/fetch", methods=["GET"])
+def fetch_documents():
+    try:
+        csv_filename = "leaderboard.csv"
+        df = download_csv(csv_filename)
+        if df is None:
+            return jsonify({"error": "Leaderboard CSV not found in Hugging Face dataset."})
+        documents = df.to_dict(orient="records")  # Convert DataFrame to list of dicts
+        return jsonify({"data": documents})
+    except Exception as e:
+        return jsonify({"error": str(e)})
 # if __name__ == "__main__":
 #     app.run(debug=True)

requirements.txt CHANGED Viewed

@@ -14,4 +14,5 @@ flask
 pymongo
 flask-cors
 pandas
-tqdm

 pymongo
 flask-cors
 pandas
+tqdm
+dotenv

test.csv CHANGED Viewed

@@ -1,4 +1,4 @@
- hash_name,transcription,age,gender,first_language,socioeconomic_bkgd,ethnicity,combined_column
 96ce0c2debfa8656fe16d30187d683df,hey facebook set my status uh to available,31 - 45,male,English,Low,"Native American, American Indian, or Alaska Native","male_English_Low_Native American, American Indian, or Alaska Native"
 f3d12b16dd637efb9ce8142632d63f34,send text message to wolfgang hey uncle wolfgang i really miss you hope all is well let's talk soon,46 - 65,male,German,Affluent,White,male_German_Affluent_White
 9859c9ab6ca0377d593ad245f39bc224,text john i am sorry about your loss,23 - 30,female,English,Low,Black or African American,female_English_Low_Black or African American

+hash_name,transcription,age,gender,first_language,socioeconomic_bkgd,ethnicity,combined_column
 96ce0c2debfa8656fe16d30187d683df,hey facebook set my status uh to available,31 - 45,male,English,Low,"Native American, American Indian, or Alaska Native","male_English_Low_Native American, American Indian, or Alaska Native"
 f3d12b16dd637efb9ce8142632d63f34,send text message to wolfgang hey uncle wolfgang i really miss you hope all is well let's talk soon,46 - 65,male,German,Affluent,White,male_German_Affluent_White
 9859c9ab6ca0377d593ad245f39bc224,text john i am sorry about your loss,23 - 30,female,English,Low,Black or African American,female_English_Low_Black or African American

utils/__init__.py ADDED Viewed

File without changes

utils/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (145 Bytes). View file

utils/__pycache__/generateResults.cpython-313.pyc ADDED Viewed

Binary file (10.1 kB). View file

utils/__pycache__/generate_box_plot.cpython-313.pyc ADDED Viewed

Binary file (5.48 kB). View file

utils/__pycache__/generate_results.cpython-313.pyc ADDED Viewed

Binary file (10.1 kB). View file

utils/__pycache__/load_csv.cpython-313.pyc ADDED Viewed

Binary file (1.61 kB). View file

utils/audio_duration.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from pydub import AudioSegment
+#The audio duration calculation
+def get_audio_duration(audio_file):
+    audio = AudioSegment.from_file(audio_file)
+    return len(audio) / 1000  # Convert milliseconds to seconds

utils/generate_box_plot.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from utils.load_csv import download_csv
+def box_plot_data(ASR_model):
+    # Load the CSV file
+    csv_result = f"test_with_{ASR_model.replace("/","_")}_WER.csv"
+    df = download_csv(csv_result)
+    # Display actual column names to check for issues
+    print(df.columns)
+    # Trim column names of any leading or trailing spaces
+    df.columns = df.columns.str.strip()
+    # Extract WER values for Male and Female
+    wer_Gender = {
+        "Male": df[df["gender"] == "male"]["WER"].tolist(),
+        "Female": df[df["gender"] == "female"]["WER"].tolist()
+    }
+    wer_SEG = {
+        "Low": df[df["socioeconomic_bkgd"] == "Low"]["WER"].tolist(),
+        "Affluent": df[df["socioeconomic_bkgd"] == "Affluent"]["WER"].tolist(),
+        "Medium": df[df["socioeconomic_bkgd"] == "Medium"]["WER"].tolist(),
+    }
+    wer_Ethnicity = {
+        "Asian, South Asian or Asian American": df[df["ethnicity"] == "Asian, South Asian or Asian American"]["WER"].tolist(),
+        "Black or African American": df[df["ethnicity"] == "Black or African American"]["WER"].tolist(),
+        "Hispanic, Latino, or Spanish": df[df["ethnicity"] == "Hispanic, Latino, or Spanish"]["WER"].tolist(),
+        "Middle Eastern or North African": df[df["ethnicity"] == "Middle Eastern or North African"]["WER"].tolist(),
+        "Native American, American Indian, or Alaska Native": df[df["ethnicity"] == "Native American, American Indian, or Alaska Native"]["WER"].tolist(),
+        "Native Hawaiian or Other Pacific Islander": df[df["ethnicity"] == "Native Hawaiian or Other Pacific Islander"]["WER"].tolist(),
+        "White": df[df["ethnicity"] == "White"]["WER"].tolist(),
+    }
+    wer_Language = {
+        "English": df[df["first_language"] == "English"]["WER"].tolist(),
+        "German": df[df["first_language"] == "German"]["WER"].tolist(),
+        "French": df[df["first_language"] == "French"]["WER"].tolist(),
+        "Arabic": df[df["first_language"] == "Arabic"]["WER"].tolist(),
+        "Cantonese": df[df["first_language"] == "Cantonese"]["WER"].tolist(),
+        "Creole": df[df["first_language"] == "Creole"]["WER"].tolist(),
+        "Dutch": df[df["first_language"] == "Dutch"]["WER"].tolist(),
+        "English/Turkish": df[df["first_language"] == "English/Turkish"]["WER"].tolist(),
+        "Filipino": df[df["first_language"] == "Filipino"]["WER"].tolist(),
+        "Hindi": df[df["first_language"] == "Hindi"]["WER"].tolist(),
+        "Hmong": df[df["first_language"] == "Hmong"]["WER"].tolist(),
+        "Hindi": df[df["first_language"] == "Hindi"]["WER"].tolist(),
+        "Indonesian": df[df["first_language"] == "Indonesian"]["WER"].tolist(),
+        "Italian": df[df["first_language"] == "Italian"]["WER"].tolist(),
+        "Japanese": df[df["first_language"] == "Japanese"]["WER"].tolist(),
+        "Korean": df[df["first_language"] == "Korean"]["WER"].tolist(),
+        "Laotian": df[df["first_language"] == "Laotian"]["WER"].tolist(),
+        "Malay": df[df["first_language"] == "Malay"]["WER"].tolist(),
+        "Malaysian": df[df["first_language"] == "Malaysian"]["WER"].tolist(),
+        "Mandarin": df[df["first_language"] == "Mandarin"]["WER"].tolist(),
+        "Marathi": df[df["first_language"] == "Marathi"]["WER"].tolist(),
+        "Nepali": df[df["first_language"] == "Nepali"]["WER"].tolist(),
+        "Other": df[df["first_language"] == "Other"]["WER"].tolist(),
+        "Portuguese": df[df["first_language"] == "Portuguese"]["WER"].tolist(),
+        "Russian": df[df["first_language"] == "Russian"]["WER"].tolist(),
+        "Spanish": df[df["first_language"] == "Spanish"]["WER"].tolist(),
+        "Tagalog": df[df["first_language"] == "Tagalog"]["WER"].tolist(),
+        "Turkish": df[df["first_language"] == "Turkish"]["WER"].tolist(),
+        "Russian": df[df["first_language"] == "Russian"]["WER"].tolist(),
+        "Ukrainian": df[df["first_language"] == "Ukrainian"]["WER"].tolist(),
+        "Urdu": df[df["first_language"] == "Urdu"]["WER"].tolist(),
+        "Vietnamese": df[df["first_language"] == "Vietnamese"]["WER"].tolist(),
+    }
+    return wer_Gender, wer_SEG, wer_Ethnicity, wer_Language

utils/generate_results.py ADDED Viewed

	@@ -0,0 +1,256 @@

+import pandas as pd
+import re
+import numpy as np
+from jiwer import wer
+import statsmodels.api as sm
+import statsmodels.formula.api as smf
+from utils.load_csv import download_csv, upload_csv
+def generateResults(ASR_model):
+    # Define normalization function
+    def normalize_text(text):
+        """
+        Normalize text by converting to lowercase, removing special characters,
+        except digits, and handling None or float values.
+        """
+        if text is None or pd.isna(text):  # Check for None or NaN
+            return ""
+        if isinstance(text, float):  # Check for floats and convert them to empty string
+            return ""
+        text = text.lower()  # Convert to lowercase
+        text = re.sub(r'[^a-z0-9\s]', '', text)  # Keep only letters, digits, and spaces
+        return text.strip()  # Remove leading/trailing spaces
+    # Load the CSV with whisper transcripts f"test_with_{ASR_model}.csv"
+    csv_transcript = f"test_with_{ASR_model.replace("/","_")}.csv"
+    # Read the CSV file
+    df = download_csv(csv_transcript)
+    if(df is None):
+        print(f"CSV not found in the dataset repo. Please generate the transcript file first.")
+        return
+    # Normalize original text and whisper transcripts
+    df['normalized_transcription'] = df[df.columns[1]].apply(normalize_text)  # Replace 'original_text' with your column name
+    # Check if whisper transcript column exists
+    if 'transcript' in df.columns:
+        df['normalized_transcript'] = df[df.columns[8]].apply(normalize_text)
+        # Calculate WER
+        wer_scores = []
+        for index, row in df.iterrows():
+            original = row['normalized_transcription']
+            transcript = row['normalized_transcript']
+            if original and transcript:
+                wer_score = wer(original, transcript)
+            else:
+                wer_score = 1.0  # Maximum error if one text is missing
+            wer_scores.append(wer_score)
+        df['WER'] = wer_scores
+        # Compute IQR
+        Q1 = df['WER'].quantile(0.25)
+        Q3 = df['WER'].quantile(0.75)
+        IQR = Q3 - Q1
+        # Define outlier range
+        lower_bound = Q1 - 1.5 * IQR
+        upper_bound = Q3 + 1.5 * IQR
+        # Remove outliers
+        df = df[(df['WER'] >= lower_bound) & (df['WER'] <= upper_bound)]
+    else:
+        print("Column 'transcript' not found in CSV")
+    # Save the updated CSV
+    csv_result = f"test_with_{ASR_model.replace("/","_")}_WER.csv"
+    upload_csv(df,csv_result)
+    print(f"WER calculations saved to {csv_result}")
+    avg_wer = df["WER"].mean()
+    avg_rtfx = df["rtfx"].mean()
+    print(f"Average WER: {avg_wer} and Avg RTFX : {avg_rtfx}")
+    #----------------------------------------------------------------------------------------------------------
+    #----------------------------------------------------------------------------------------------------------
+    # Define protected attributes and label columns
+    protected_attributes = ['gender', 'first_language', 'socioeconomic_bkgd', 'ethnicity']
+    label_column = 'normalized_transcription'
+    prediction_column = 'normalized_transcript'
+    wer_column = 'WER'
+    data = df
+    # Function to calculate WER disparity
+    def calculate_wer_disparity(data, protected_attribute, wer_column):
+        groups = data[protected_attribute].unique()
+        wer_disparity = {}
+        for group in groups:
+            group_data = data[data[protected_attribute] == group]
+            avg_wer = group_data[wer_column].mean()
+            wer_disparity[group] = avg_wer
+        return wer_disparity
+    # Calculate WER disparity for each protected attribute
+    for attribute in protected_attributes:
+        disparity = calculate_wer_disparity(data, attribute, wer_column)
+        print(f"WER Disparity for {attribute}:", disparity)
+    #-------------------------------------------------------------------------------------------------------
+    #-------------------------------------------------------------------------------------------------------
+    data["Reference_words"] = data["normalized_transcription"].str.split().str.len()
+    # Compute word error count (WER_count)
+    data["WER_count"] = data["Reference_words"] * data["WER"]
+    df = data
+    categorical_cols = ['gender', 'first_language', 'socioeconomic_bkgd', 'ethnicity']
+    for col in categorical_cols:
+        df[col] = df[col].astype("category")
+    # Offset: log of reference word count (to adjust for different transcript lengths)
+    df["log_Ref_Words"] = np.log(df["Reference_words"] + 1)  # Adding 1 to avoid log(0)
+    # Fit a Mixed-Effects Poisson Regression Model
+    mixed_model = smf.mixedlm(
+        formula="WER_count ~ log_Ref_Words + age + gender + first_language + socioeconomic_bkgd + ethnicity",  # Fixed effects
+        data=df,
+        groups=df["combined_column"]  # Random effect on speaker
+    ).fit()
+    # Display results
+    # print(mixed_model.summary())
+    #--------------------------------------------------------------------------------------------------------------------------
+    #--------------------------------------------------------------------------------------------------------------------------
+    from scipy.stats import chi2
+    # Assume 'mixed_model' is your already-fitted mixed-effects model and 'df' is your DataFrame.
+    # Also assume df["log_Ref_Words"] = np.log(df["Reference_words"] + 1)
+    params = mixed_model.params
+    # Set fixed values for continuous predictors:
+    fixed_log_ref = df["log_Ref_Words"].mean()
+    baseline_log = params["Intercept"] + params["log_Ref_Words"] * fixed_log_ref
+    exposure = np.exp(fixed_log_ref) - 1
+    def compute_predicted_error_rate(category, level, params, baseline_log, exposure):
+        """Computes the predicted WER (error rate) for a given level of a demographic attribute."""
+        coef_name = f"{category}[T.{level}]"
+        effect = params.get(coef_name, 0)  # For the baseline level, effect is 0.
+        pred_log = baseline_log + effect
+        pred_count = np.exp(pred_log)
+        return pred_count / exposure
+    def compute_category_fairness(category, params, baseline_log, exposure, df):
+        """
+        For a given category, compute:
+        - Predicted error rates for each subgroup level.
+        - Raw fairness scores (0-100 scale: 100 = best, 0 = worst) based on linear scaling.
+        - A weighted category fairness score using group proportions.
+        """
+        levels = df[category].cat.categories
+        predictions = {}
+        for lvl in levels:
+            predictions[lvl] = compute_predicted_error_rate(category, lvl, params, baseline_log, exposure)
+        # Convert predictions to a Series.
+        pred_series = pd.Series(predictions)
+        min_pred, max_pred = pred_series.min(), pred_series.max()
+        # Compute raw fairness scores: if all levels are identical, assign 100 to everyone.
+        if max_pred == min_pred:
+            raw_fairness = pred_series.apply(lambda x: 100.0)
+        else:
+            raw_fairness = pred_series.apply(lambda x: 100 * (1 - (x - min_pred) / (max_pred - min_pred)))
+        # Weight the subgroup fairness scores by their sample proportions in the dataset.
+        group_proportions = df[category].value_counts(normalize=True)
+        # Ensure ordering matches the fairness scores index:
+        group_proportions = group_proportions.reindex(raw_fairness.index, fill_value=0)
+        weighted_category_fairness = np.average(raw_fairness, weights=group_proportions)
+        return pred_series, raw_fairness, weighted_category_fairness
+    def perform_lrt(attribute, df):
+        """Performs Likelihood Ratio Test (LRT) to test the overall significance of an attribute."""
+        full_model = smf.mixedlm(f"WER ~ {attribute} + log_Ref_Words", df, groups=df["combined_column"]).fit()
+        reduced_model = smf.mixedlm("WER ~ log_Ref_Words", df, groups=df["combined_column"]).fit()
+        lr_stat = 2 * (full_model.llf - reduced_model.llf)
+        df_diff = full_model.df_modelwc - reduced_model.df_modelwc
+        p_value = chi2.sf(lr_stat, df_diff)
+        return p_value
+    # List of attributes to evaluate
+    categories = ['gender', 'first_language', 'socioeconomic_bkgd', 'ethnicity']
+    results = {}
+    adjusted_category_scores = []  # To store adjusted fairness scores for each category.
+    weights_for_categories = []    # Weight each category based on significance if desired.
+    for cat in categories:
+        preds, raw_fairness, category_raw_score = compute_category_fairness(cat, params, baseline_log, exposure, df)
+        # Perform LRT to get overall significance for this attribute.
+        lrt_p_value = perform_lrt(cat, df)
+        # Compute multiplier based on significance.
+        # If p-value < 0.05, we penalize the fairness score proportionally.
+        multiplier = (lrt_p_value / 0.05) if lrt_p_value < 0.05 else 1.0
+        # Adjusted fairness score for the category:
+        adjusted_score = category_raw_score * multiplier
+        # Save results.
+        results[cat] = {
+            'Predicted Error Rates': preds,
+            'Raw Fairness Scores': raw_fairness,
+            # 'Weighted Raw Fairness Score': category_raw_score,
+            # 'LRT p-value': lrt_p_value,
+            'Adjusted Category Fairness Score': adjusted_score
+        }
+        # For overall score, we could weight categories (here we simply use the adjusted score).
+        adjusted_category_scores.append(adjusted_score)
+        # Optionally, use multiplier as a weight for overall aggregation.
+        weights_for_categories.append(multiplier)
+    # Compute overall fairness score across attributes using the adjusted category scores.
+    overall_fairness_score = np.average(adjusted_category_scores)
+    #FAAS is the Fairness Adjusted ASR Score based on which models will be ranked
+    faas = 10*np.log10(overall_fairness_score/avg_wer)
+    print("Fairness Adjusted ASR Score for the model is", faas)
+    # print("\nFinal Overall Fairness Score (Weighted Average over Categories):", overall_fairness_score) #  used for summary_speedometer,Leaderboard
+    # print(results['gender'])
+    # print(results['gender']['Predicted Error Rates'])
+    # print(results['gender']['Adjusted Category Fairness Score'])
+    print("________________________________")
+    Results = {
+        'Predicted Error Rates': {
+            'gender': results['gender']['Predicted Error Rates'].to_dict(),  # Convert Series to dict
+            'first_language': results['first_language']['Predicted Error Rates'].to_dict(),
+            'socioeconomic_bkgd': results['socioeconomic_bkgd']['Predicted Error Rates'].to_dict(),
+            'ethnicity': results['ethnicity']['Predicted Error Rates'].to_dict()
+        },
+        'Raw Fairness Scores': {
+            'gender': results['gender']['Raw Fairness Scores'].to_dict(),
+            'first_language': results['first_language']['Raw Fairness Scores'].to_dict(),
+            'socioeconomic_bkgd': results['socioeconomic_bkgd']['Raw Fairness Scores'].to_dict(),
+            'ethnicity': results['ethnicity']['Raw Fairness Scores'].to_dict()
+        },
+        'Adjusted Category Fairness Score': {
+            'gender': float(results['gender']['Adjusted Category Fairness Score']),  # Convert NumPy float to Python float
+            'first_language': float(results['first_language']['Adjusted Category Fairness Score']),
+            'socioeconomic_bkgd': float(results['socioeconomic_bkgd']['Adjusted Category Fairness Score']),
+            'ethnicity': float(results['ethnicity']['Adjusted Category Fairness Score'])
+        },
+        'Overall Fairness Score': float(overall_fairness_score),  # Convert NumPy float to Python float
+        'Avg_wer': float(avg_wer),  # Convert NumPy float to Python float
+        'Avg_rtfx': float(avg_rtfx),  # Convert NumPy float to Python float
+        'FAAS': float(faas),  # Convert NumPy float to Python float
+        'ASR_model': ASR_model,
+    }
+    # print(Results)
+    return Results

utils/load_csv.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from dotenv import load_dotenv
+from huggingface_hub import HfApi, hf_hub_download
+import os
+import io
+import pandas as pd
+# Load environment variables from .env file
+load_dotenv()
+# ASR_model = "openai/whisper-largev2"  # Replace with your ASR model
+# csv_path = "test.csv" #read from local
+# csv_transcript = f"test_with_{ASR_model.replace("/","_")}.csv" # to save in dataset repo
+# csv_result = f"test_with_{ASR_model.replace("/","_")}_WER.csv" # to save in dataset repo
+# df = pd.read_csv(csv_path)
+# print(f"CSV Loaded with {len(df)} rows")
+def upload_csv(df,csv_filename):
+    csv_buffer = io.BytesIO()
+    df.to_csv(csv_buffer, index=False)
+    csv_buffer.seek(0)
+    try:
+        # Upload the generated csv to Hugging Face Hub
+        api = HfApi(token=os.getenv("HF_TOKEN"))
+        print(f"✅ CSV uploading : {csv_filename}")
+        api.upload_file(
+            path_or_fileobj=csv_buffer,
+            path_in_repo=csv_filename,
+            repo_id="satyamr196/asr_fairness_results",
+            repo_type="dataset"
+        )
+        return True
+    except Exception as e:
+        print(f"⚠️ Could not upload CSV: {csv_filename} — {e}")
+        return False
+# upload_csv(df,f"test_with_{ASR_model.replace("/","_")}_WER.csv");
+def download_csv(csv_filename):
+    repo_id = "satyamr196/asr_fairness_results"
+    try:
+        # Download the CSV file from the dataset repo
+        csv_path = hf_hub_download(repo_id=repo_id, filename=csv_filename, repo_type="dataset")
+        # Load into pandas
+        return pd.read_csv(csv_path)
+    except Exception as e:
+        # print(f"⚠️ Could not load CSV: {csv_filename} — {e}")
+        return None
+# # # Load the csv from the Hugging Face Hub
+# df = download_csv(csv_result)
+# if(df is None):
+#     print(f"CSV not found in the dataset repo. Please upload the file first.")
+# else:
+#     print(f"CSV Loaded with {len(df)} rows")
+# print(df)