File size: 4,125 Bytes
547836e
 
 
 
 
8e62829
547836e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from utils.load_csv import download_csv

def box_plot_data(ASR_model):

    # Load the CSV file
    csv_result = f'test_with_{ASR_model.replace("/","_")}_WER.csv'
    df = download_csv(csv_result)

    # Display actual column names to check for issues
    print(df.columns)

    # Trim column names of any leading or trailing spaces
    df.columns = df.columns.str.strip()

    # Extract WER values for Male and Female
    wer_Gender = {
        "Male": df[df["gender"] == "male"]["WER"].tolist(),
        "Female": df[df["gender"] == "female"]["WER"].tolist()
    }
    wer_SEG = {
        "Low": df[df["socioeconomic_bkgd"] == "Low"]["WER"].tolist(),
        "Affluent": df[df["socioeconomic_bkgd"] == "Affluent"]["WER"].tolist(),
        "Medium": df[df["socioeconomic_bkgd"] == "Medium"]["WER"].tolist(),
    }
    wer_Ethnicity = {
        "Asian, South Asian or Asian American": df[df["ethnicity"] == "Asian, South Asian or Asian American"]["WER"].tolist(),
        "Black or African American": df[df["ethnicity"] == "Black or African American"]["WER"].tolist(),
        "Hispanic, Latino, or Spanish": df[df["ethnicity"] == "Hispanic, Latino, or Spanish"]["WER"].tolist(),
        "Middle Eastern or North African": df[df["ethnicity"] == "Middle Eastern or North African"]["WER"].tolist(),
        "Native American, American Indian, or Alaska Native": df[df["ethnicity"] == "Native American, American Indian, or Alaska Native"]["WER"].tolist(),
        "Native Hawaiian or Other Pacific Islander": df[df["ethnicity"] == "Native Hawaiian or Other Pacific Islander"]["WER"].tolist(),
        "White": df[df["ethnicity"] == "White"]["WER"].tolist(),
    }
    wer_Language = {
        "English": df[df["first_language"] == "English"]["WER"].tolist(),
        "German": df[df["first_language"] == "German"]["WER"].tolist(),
        "French": df[df["first_language"] == "French"]["WER"].tolist(),
        "Arabic": df[df["first_language"] == "Arabic"]["WER"].tolist(),
        "Cantonese": df[df["first_language"] == "Cantonese"]["WER"].tolist(),
        "Creole": df[df["first_language"] == "Creole"]["WER"].tolist(),
        "Dutch": df[df["first_language"] == "Dutch"]["WER"].tolist(),
        "English/Turkish": df[df["first_language"] == "English/Turkish"]["WER"].tolist(),
        "Filipino": df[df["first_language"] == "Filipino"]["WER"].tolist(),
        "Hindi": df[df["first_language"] == "Hindi"]["WER"].tolist(),
        "Hmong": df[df["first_language"] == "Hmong"]["WER"].tolist(),
        "Hindi": df[df["first_language"] == "Hindi"]["WER"].tolist(),
        "Indonesian": df[df["first_language"] == "Indonesian"]["WER"].tolist(),
        "Italian": df[df["first_language"] == "Italian"]["WER"].tolist(),
        "Japanese": df[df["first_language"] == "Japanese"]["WER"].tolist(),
        "Korean": df[df["first_language"] == "Korean"]["WER"].tolist(),
        "Laotian": df[df["first_language"] == "Laotian"]["WER"].tolist(),
        "Malay": df[df["first_language"] == "Malay"]["WER"].tolist(),
        "Malaysian": df[df["first_language"] == "Malaysian"]["WER"].tolist(),
        "Mandarin": df[df["first_language"] == "Mandarin"]["WER"].tolist(),
        "Marathi": df[df["first_language"] == "Marathi"]["WER"].tolist(),
        "Nepali": df[df["first_language"] == "Nepali"]["WER"].tolist(),
        "Other": df[df["first_language"] == "Other"]["WER"].tolist(),
        "Portuguese": df[df["first_language"] == "Portuguese"]["WER"].tolist(),
        "Russian": df[df["first_language"] == "Russian"]["WER"].tolist(),
        "Spanish": df[df["first_language"] == "Spanish"]["WER"].tolist(),
        "Tagalog": df[df["first_language"] == "Tagalog"]["WER"].tolist(),
        "Turkish": df[df["first_language"] == "Turkish"]["WER"].tolist(),
        "Russian": df[df["first_language"] == "Russian"]["WER"].tolist(),
        "Ukrainian": df[df["first_language"] == "Ukrainian"]["WER"].tolist(),
        "Urdu": df[df["first_language"] == "Urdu"]["WER"].tolist(),
        "Vietnamese": df[df["first_language"] == "Vietnamese"]["WER"].tolist(),
    }

    return wer_Gender, wer_SEG, wer_Ethnicity, wer_Language