Commit
Β·
9fc4854
1
Parent(s):
7d85866
Clean up the code and add ABOUT
Browse files- app.py +30 -87
- pages/about.md +17 -0
- pages/description.html +11 -0
- pages/emotions.md +1 -0
- pages/features.md +1 -0
- pages/overall.md +1 -0
- pages/submit.md +13 -0
app.py
CHANGED
@@ -9,6 +9,13 @@ def restart_space():
|
|
9 |
API.restart_space(repo_id=REPO_ID)
|
10 |
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
app = gr.Blocks(css=custom_css)
|
13 |
|
14 |
with app:
|
@@ -24,39 +31,16 @@ with app:
|
|
24 |
"""
|
25 |
|
26 |
# Title and Description of the Leaderboard
|
27 |
-
gr.HTML("""
|
28 |
-
<style>
|
29 |
-
@import url('https://fonts.googleapis.com/css2?family=Ubuntu:wght@400;700&display=swap');
|
30 |
-
</style>
|
31 |
-
<h1 style="text-align: center; font-family: 'Ubuntu', sans-serif; font-size: 36px; color: #002d69;">
|
32 |
-
Open Voice Cloning Leaderboard
|
33 |
-
</h1>
|
34 |
-
<p style="text-align:center; font-size: 15px; width: 85%; margin: 0 auto;">
|
35 |
-
The <b>Open Voice Cloning Leaderboard</b> ranks and evaluates the voice cloning models across
|
36 |
-
diverse datasets, including emotional speech.<br>It also delivers an in-depth analysis of how
|
37 |
-
different acoustic features shape the final results.
|
38 |
-
</p>
|
39 |
-
""")
|
40 |
-
|
41 |
|
42 |
-
|
43 |
-
============
|
44 |
-
Leaderboard
|
45 |
-
============
|
46 |
-
"""
|
47 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
48 |
with gr.TabItem("π
Leaderboard", elem_id="Leaderboard", id=0, elem_classes="tab-item"):
|
49 |
|
50 |
-
|
51 |
-
========
|
52 |
-
Overall
|
53 |
-
========
|
54 |
-
'''
|
55 |
with gr.TabItem("Overall", elem_id="Overall", id=1, elem_classes="tab-item"):
|
56 |
-
gr.Markdown(
|
57 |
-
|
58 |
-
of the original and cloned samples, generated by the WavLM model.
|
59 |
-
""")
|
60 |
# Create and display leaderboard table
|
61 |
leaderboard_dataframe = leaderboard.create_leaderboard_data('All', 'wavlm', 'emotion')
|
62 |
leaderboard_table = gr.DataFrame(leaderboard_dataframe,
|
@@ -64,17 +48,9 @@ with app:
|
|
64 |
interactive=False,
|
65 |
)
|
66 |
|
67 |
-
|
68 |
-
=========
|
69 |
-
Emotions
|
70 |
-
=========
|
71 |
-
'''
|
72 |
with gr.TabItem("Emotions", elem_id="Emotions", id=2, elem_classes="tab-item"):
|
73 |
-
gr.Markdown(
|
74 |
-
The results represent the cosine similarity between the speaker embeddings
|
75 |
-
of the original and cloned samples, generated by the WavLM model. The values
|
76 |
-
can be filtered by dataset or emotional state.
|
77 |
-
""")
|
78 |
|
79 |
# UI for selecting dataset and emotion options
|
80 |
with gr.Row():
|
@@ -109,17 +85,9 @@ with app:
|
|
109 |
)
|
110 |
|
111 |
|
112 |
-
|
113 |
-
=========
|
114 |
-
Features
|
115 |
-
=========
|
116 |
-
'''
|
117 |
with gr.TabItem("Features", elem_id="Features", id=3, elem_classes="tab-item"):
|
118 |
-
gr.Markdown(
|
119 |
-
The results represent the cosine similarity between the values of selected
|
120 |
-
acoustic features of the original and cloned samples. The values
|
121 |
-
can be filtered by dataset or emotional state.
|
122 |
-
""")
|
123 |
|
124 |
# UI for selecting dataset, emotion, and feature options
|
125 |
with gr.Row():
|
@@ -166,48 +134,23 @@ with app:
|
|
166 |
[leaderboard_table]
|
167 |
)
|
168 |
|
169 |
-
|
170 |
-
'''
|
171 |
-
======
|
172 |
-
About
|
173 |
-
======
|
174 |
-
'''
|
175 |
with gr.TabItem("π About", elem_id="About", id=4):
|
176 |
-
gr.Markdown(
|
177 |
-
|
178 |
|
179 |
-
|
180 |
-
=============
|
181 |
-
Submit here!
|
182 |
-
=============
|
183 |
-
'''
|
184 |
with gr.TabItem("π Submit here! ", elem_id="Submit", id=5):
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
)
|
197 |
-
with gr.Row():
|
198 |
-
gr.Markdown(
|
199 |
-
"""
|
200 |
-
<div style="background: linear-gradient(135deg, #007B83, #2E2E2E); padding:1.5rem; border-radius:10px; color:#EEEEEE; font-size:1rem; line-height:1.8;">
|
201 |
-
<h2>π <b>How to Submit Your Model:</b></h2>
|
202 |
-
<div style="margin-left:1rem;">
|
203 |
-
<p style="margin-bottom:1rem;">βοΈ <b>Step 1:</b> Send an email to <b><a href="mailto:cloneval@csi.wmi.amu.edu.pl" style="color:#FFD369; text-decoration:none;" onmouseover="this.style.color='#FFF'" onmouseout="this.style.color='#FFD369'">cloneval@csi.wmi.amu.edu.pl</a></b>.</p>
|
204 |
-
<p style="margin-bottom:1rem;">π <b>Step 2:</b> Include the link to your voice cloning model.</p>
|
205 |
-
<p style="margin-bottom:1rem;">π <b>Step 3:</b> Once evaluated, your model will join the leaderboard.</p>
|
206 |
-
</div>
|
207 |
-
<p style="margin-top:1rem; font-style:italic; text-align:center;">Thanks for sharing your work with us and making this project even better!</p>
|
208 |
-
</div>
|
209 |
-
"""
|
210 |
-
)
|
211 |
|
212 |
|
213 |
scheduler = BackgroundScheduler()
|
|
|
9 |
API.restart_space(repo_id=REPO_ID)
|
10 |
|
11 |
|
12 |
+
CITATION_TEXT = r"""@misc{cloneval,
|
13 |
+
author={Christop, Iwona and KuczyΕski, Tomasz and Kubis, Marek},
|
14 |
+
title={{ClonEval: An Open Voice Cloning Benchmark}},
|
15 |
+
year={2025},
|
16 |
+
}"""
|
17 |
+
|
18 |
+
|
19 |
app = gr.Blocks(css=custom_css)
|
20 |
|
21 |
with app:
|
|
|
31 |
"""
|
32 |
|
33 |
# Title and Description of the Leaderboard
|
34 |
+
gr.HTML(open("pages/description.html", "r").read())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
# LEADERBOARD
|
|
|
|
|
|
|
|
|
37 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
38 |
with gr.TabItem("π
Leaderboard", elem_id="Leaderboard", id=0, elem_classes="tab-item"):
|
39 |
|
40 |
+
# OVERALL
|
|
|
|
|
|
|
|
|
41 |
with gr.TabItem("Overall", elem_id="Overall", id=1, elem_classes="tab-item"):
|
42 |
+
gr.Markdown(open("pages/overall.md", "r").read())
|
43 |
+
|
|
|
|
|
44 |
# Create and display leaderboard table
|
45 |
leaderboard_dataframe = leaderboard.create_leaderboard_data('All', 'wavlm', 'emotion')
|
46 |
leaderboard_table = gr.DataFrame(leaderboard_dataframe,
|
|
|
48 |
interactive=False,
|
49 |
)
|
50 |
|
51 |
+
# EMOTIONS
|
|
|
|
|
|
|
|
|
52 |
with gr.TabItem("Emotions", elem_id="Emotions", id=2, elem_classes="tab-item"):
|
53 |
+
gr.Markdown(open("pages/emotions.md", "r").read())
|
|
|
|
|
|
|
|
|
54 |
|
55 |
# UI for selecting dataset and emotion options
|
56 |
with gr.Row():
|
|
|
85 |
)
|
86 |
|
87 |
|
88 |
+
# FEATURES
|
|
|
|
|
|
|
|
|
89 |
with gr.TabItem("Features", elem_id="Features", id=3, elem_classes="tab-item"):
|
90 |
+
gr.Markdown(open("pages/features.md", "r").read())
|
|
|
|
|
|
|
|
|
91 |
|
92 |
# UI for selecting dataset, emotion, and feature options
|
93 |
with gr.Row():
|
|
|
134 |
[leaderboard_table]
|
135 |
)
|
136 |
|
137 |
+
# ABOUT
|
|
|
|
|
|
|
|
|
|
|
138 |
with gr.TabItem("π About", elem_id="About", id=4):
|
139 |
+
gr.Markdown(open("pages/about.md", "r").read())
|
|
|
140 |
|
141 |
+
# SUBMIT HERE
|
|
|
|
|
|
|
|
|
142 |
with gr.TabItem("π Submit here! ", elem_id="Submit", id=5):
|
143 |
+
gr.Markdown(open("pages/submit.md", "r").read())
|
144 |
+
|
145 |
+
with gr.Column():
|
146 |
+
with gr.Accordion("π Citation", open=False):
|
147 |
+
citation_button = gr.Textbox(
|
148 |
+
label="",
|
149 |
+
value=CITATION_TEXT,
|
150 |
+
lines=5,
|
151 |
+
elem_id="citation-button",
|
152 |
+
show_copy_button=True,
|
153 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
|
156 |
scheduler = BackgroundScheduler()
|
pages/about.md
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# π About
|
2 |
+
|
3 |
+
The **Open Voice Cloning Leaderboard** is part of the **ClonEval** benchmark. In addition to the Leaderboard, the benchmark consists of:
|
4 |
+
- a deterministic evaluation protocol that sets defaults for data, metrics, and models to be used in the voice cloning assessment process,
|
5 |
+
- an open-source software library that can be used to evaluate voice cloning models in a reproducible manner.
|
6 |
+
|
7 |
+
## Evaluation Procedure
|
8 |
+
|
9 |
+
The evaluation procedure involves two stages. First, samples are generated using a voice cloning model. The model must take as input a sample of voice to be cloned and a text sample of an utterance.
|
10 |
+
|
11 |
+
Following the generation of samples through the voice cloning model, an evaluation is conducted by obtaining speaker embeddings with the [**WavLM**](https://huggingface.co/microsoft/wavlm-base-plus-sv) model. For each pair of samples (reference and generated), the cosine similarity between their speaker embeddings from WavLM and between the values of acoustic features extracted from samples is calculated. The similarity values obtained on all samples from a given dataset are averaged to obtain the final evaluation result.
|
12 |
+
|
13 |
+
For the purpose of conducting fine-grained error analysis, we also extract acoustic features from each sample with Librosa.
|
14 |
+
|
15 |
+
## Software Library
|
16 |
+
|
17 |
+
The code for the evaluation procedure is available in the GitHub repository ([here](https://github.com/amu-cai/cloneval)).
|
pages/description.html
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<style>
|
2 |
+
@import url('https://fonts.googleapis.com/css2?family=Ubuntu:wght@400;700&display=swap');
|
3 |
+
</style>
|
4 |
+
<h1 style="text-align: center; font-family: 'Ubuntu', sans-serif; font-size: 36px; color: #002d69;">
|
5 |
+
Open Voice Cloning Leaderboard
|
6 |
+
</h1>
|
7 |
+
<p style="text-align:center; font-size: 15px; width: 85%; margin: 0 auto;">
|
8 |
+
The <b>Open Voice Cloning Leaderboard</b> ranks and evaluates the voice cloning models across
|
9 |
+
diverse datasets, including emotional speech.<br>It also delivers an in-depth analysis of how
|
10 |
+
different acoustic features shape the final results.
|
11 |
+
</p>
|
pages/emotions.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
The results represent the cosine similarity between the speaker embeddings of the original and cloned samples, generated by the WavLM model. The values can be filtered by dataset or emotional state.
|
pages/features.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
The results represent the cosine similarity between the values of selected acoustic features of the original and cloned samples. The values can be filtered by dataset or emotional state.
|
pages/overall.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
The results represent the cosine similarity between the speaker embeddings of the original and cloned samples, generated by the WavLM model.
|
pages/submit.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# βοΈβ¨ Submit Your Model Here! β¨βοΈ
|
2 |
+
|
3 |
+
Help us improve the leaderboard by submitting your voice cloning model.
|
4 |
+
|
5 |
+
## π How to Submit Your Model:
|
6 |
+
|
7 |
+
βοΈ **Step 1:** Send an email to [**cloneval@csi.wmi.amu.edu.pl**](mailto:cloneval@csi.wmi.amu.edu.pl).
|
8 |
+
|
9 |
+
π **Step 2:** Include the link to your voice cloning model.
|
10 |
+
|
11 |
+
π **Step 3:** Once evaluated, your model will join the leaderboard.
|
12 |
+
|
13 |
+
Thanks for sharing your work with us and making this project even better!
|