Liam Pond commited on
Commit
9062c1f
·
1 Parent(s): c79c7f8

smarter phonemizer

Browse files
app.py CHANGED
@@ -10,6 +10,8 @@ import requests
10
  import zipfile
11
  import streamlit.components.v1 as components
12
  from pathlib import Path
 
 
13
 
14
  def patch_config_yaml_files():
15
  root = "/tmp/cantussvs_v1"
@@ -174,87 +176,87 @@ full_phoneme_list_display = [phoneme_display_map.get(p, p) for p in permitted_ph
174
  # Pitch list D4-D5
175
  allowed_pitches = ["D4", "D#4", "E4", "F4", "F#4", "G4", "G#4", "A4", "A#4", "B4", "C5", "C#5", "D5"]
176
 
177
- # # Title
178
- # st.title("CantusSVS: Latin Singing Voice Synthesis")
179
 
180
- # st.markdown("""
181
- # # About CantusSVS
182
 
183
- # <p>CantusSVS is a web-based Singing Voice Synthesis (SVS) system designed for composers and musicians to synthesize Latin chant audio from a custom musical score.
184
- # Built on top of the DiffSinger AI model, CantusSVS enables detailed, precise control over melody, rhythm, phonemes, and timing without any programming knowledge required.</p>
185
 
186
- # <p>Designed by Liam Pond as the final project for MUS6329X: Projet en informatique musicale (Prof. Dominic Thibault) at the Université de Montréal.
187
- # You can view this project's GitHub repository [here](https://github.com/liampond/CantusSVS).</p>
188
 
189
- # ---
190
 
191
- # # How to Use CantusSVS
192
 
193
- # ## 1. Compose Your Music
194
 
195
- # Compose the chant you want to synthesize using the notation software of your choice. [MuseScore 4](https://musescore.org/en/download) is recommended.
196
- # The chant must adhere to the following conditions:
197
 
198
- # - Monophonic only (one note at a time, no harmonies or chords)
199
- # - Pitch range of <span class="tooltip">**D4 to D5**<span class="tooltiptext">Because training data was limited outside this range, synthesis outside these pitches is very poor.</span></span>
200
- # - Lyrics (Latin) under each note, separated by syllable
201
 
202
- # ## 2. Export Your Score to MEI
203
 
204
- # When your score is complete, export it to MEI.
205
 
206
- # In MuseScore:
207
- # - Go to **File → Export**
208
- # - Choose the `.mei` file format
209
- # - Save it to your computer
210
 
211
- # ## 3. Upload Your Score to CantusSVS
212
 
213
- # In the CantusSVS web app:
214
 
215
- # - Select **MEI** mode
216
- # - Adjust the **tempo** if necessary using the provided slider
217
- # - Upload your `.mei` file
218
- # - Your score will be displayed using Verovio
219
- # - You may use the demo `.mei` file if you wish
220
 
221
- # ## 4. Edit Phonemes, Durations, and Pitches
222
 
223
- # CantusSVS automatically suggests phoneme splits for each syllable.
224
- # However, you will have the opportunity to review phonemes, durations, and pitches.
225
 
226
- # ## 5. Synthesize the Audio
227
 
228
- # When you're done:
229
 
230
- # - Click **Confirm**
231
- # - CantusSVS will create a `.ds` file which are processed through pretrained DiffSinger models
232
- # - The synthesized chant will be generated
233
 
234
- # This can take a few minutes depending on input length
235
 
236
- # ## 6. Listen and Download
237
 
238
- # After synthesis you can either listen to your chant directly in the app or download a `.wav` file to your computer.
239
 
240
- # ---
241
- # """, unsafe_allow_html=True)
242
 
243
- # st.markdown("""
244
- # <script>
245
- # const tooltipSpan = window.parent.document.querySelector('span[style*="border-bottom: 1px dotted black"]');
246
- # if (tooltipSpan) {
247
- # tooltipSpan.addEventListener('mouseover', () => {
248
- # tooltipSpan.children[0].style.visibility = 'visible';
249
- # tooltipSpan.children[0].style.opacity = 1;
250
- # });
251
- # tooltipSpan.addEventListener('mouseout', () => {
252
- # tooltipSpan.children[0].style.visibility = 'hidden';
253
- # tooltipSpan.children[0].style.opacity = 0;
254
- # });
255
- # }
256
- # </script>
257
- # """, unsafe_allow_html=True)
258
 
259
  filetype = st.selectbox("Select file type:", ["MEI", "DS"])
260
 
@@ -301,15 +303,16 @@ if filetype == "MEI":
301
  for note in st.session_state.original_raw_notes:
302
  syllable_text = note["lyric"]
303
  pitch = note["pitch"]
304
- phonemes = list(syllable_text) if syllable_text else ["a"]
305
- syllable = []
306
- for ph in phonemes:
307
- syllable.append({
308
- "phoneme": ph if ph in permitted_phonemes else "a",
309
- "duration": max(0.05, (note["duration"] / len(phonemes)) * (60/tempo)),
310
- "pitch": pitch if pitch in allowed_pitches else "D4"
311
- })
312
- syllable_groups.append({"syllable": syllable_text, "phonemes": syllable})
 
313
 
314
  if "edited_syllables" not in st.session_state:
315
  st.session_state.edited_syllables = syllable_groups
 
10
  import zipfile
11
  import streamlit.components.v1 as components
12
  from pathlib import Path
13
+ from webapp.services.defaults.default_splitter import split_syllable
14
+
15
 
16
  def patch_config_yaml_files():
17
  root = "/tmp/cantussvs_v1"
 
176
  # Pitch list D4-D5
177
  allowed_pitches = ["D4", "D#4", "E4", "F4", "F#4", "G4", "G#4", "A4", "A#4", "B4", "C5", "C#5", "D5"]
178
 
179
+ # Title
180
+ st.title("CantusSVS: Latin Singing Voice Synthesis")
181
 
182
+ st.markdown("""
183
+ # About CantusSVS
184
 
185
+ <p>CantusSVS is a web-based Singing Voice Synthesis (SVS) system designed for composers and musicians to synthesize Latin chant audio from a custom musical score.
186
+ Built on top of the DiffSinger AI model, CantusSVS enables detailed, precise control over melody, rhythm, phonemes, and timing without any programming knowledge required.</p>
187
 
188
+ <p>Designed by Liam Pond as the final project for MUS6329X: Projet en informatique musicale (Prof. Dominic Thibault) at the Université de Montréal.
189
+ You can view this project's GitHub repository [here](https://github.com/liampond/CantusSVS).</p>
190
 
191
+ ---
192
 
193
+ # How to Use CantusSVS
194
 
195
+ ## 1. Compose Your Music
196
 
197
+ Compose the chant you want to synthesize using the notation software of your choice. [MuseScore 4](https://musescore.org/en/download) is recommended.
198
+ The chant must adhere to the following conditions:
199
 
200
+ - Monophonic only (one note at a time, no harmonies or chords)
201
+ - Pitch range of <span class="tooltip">**D4 to D5**<span class="tooltiptext">Because training data was limited outside this range, synthesis outside these pitches is very poor.</span></span>
202
+ - Lyrics (Latin) under each note, separated by syllable
203
 
204
+ ## 2. Export Your Score to MEI
205
 
206
+ When your score is complete, export it to MEI.
207
 
208
+ In MuseScore:
209
+ - Go to **File → Export**
210
+ - Choose the `.mei` file format
211
+ - Save it to your computer
212
 
213
+ ## 3. Upload Your Score to CantusSVS
214
 
215
+ In the CantusSVS web app:
216
 
217
+ - Select **MEI** mode
218
+ - Adjust the **tempo** if necessary using the provided slider
219
+ - Upload your `.mei` file
220
+ - Your score will be displayed using Verovio
221
+ - You may use the demo `.mei` file if you wish
222
 
223
+ ## 4. Edit Phonemes, Durations, and Pitches
224
 
225
+ CantusSVS automatically suggests phoneme splits for each syllable.
226
+ However, you will have the opportunity to review phonemes, durations, and pitches.
227
 
228
+ ## 5. Synthesize the Audio
229
 
230
+ When you're done:
231
 
232
+ - Click **Confirm**
233
+ - CantusSVS will create a `.ds` file which are processed through pretrained DiffSinger models
234
+ - The synthesized chant will be generated
235
 
236
+ This can take a few minutes depending on input length
237
 
238
+ ## 6. Listen and Download
239
 
240
+ After synthesis you can either listen to your chant directly in the app or download a `.wav` file to your computer.
241
 
242
+ ---
243
+ """, unsafe_allow_html=True)
244
 
245
+ st.markdown("""
246
+ <script>
247
+ const tooltipSpan = window.parent.document.querySelector('span[style*="border-bottom: 1px dotted black"]');
248
+ if (tooltipSpan) {
249
+ tooltipSpan.addEventListener('mouseover', () => {
250
+ tooltipSpan.children[0].style.visibility = 'visible';
251
+ tooltipSpan.children[0].style.opacity = 1;
252
+ });
253
+ tooltipSpan.addEventListener('mouseout', () => {
254
+ tooltipSpan.children[0].style.visibility = 'hidden';
255
+ tooltipSpan.children[0].style.opacity = 0;
256
+ });
257
+ }
258
+ </script>
259
+ """, unsafe_allow_html=True)
260
 
261
  filetype = st.selectbox("Select file type:", ["MEI", "DS"])
262
 
 
303
  for note in st.session_state.original_raw_notes:
304
  syllable_text = note["lyric"]
305
  pitch = note["pitch"]
306
+ syllable = split_syllable(
307
+ syllable=syllable_text,
308
+ note_duration=note["duration"],
309
+ tempo=tempo,
310
+ pitch=pitch
311
+ )
312
+ syllable_groups.append({
313
+ "syllable": syllable_text,
314
+ "phonemes": syllable
315
+ })
316
 
317
  if "edited_syllables" not in st.session_state:
318
  st.session_state.edited_syllables = syllable_groups
webapp/services/defaults/default_splitter.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # webapp/services/defaults/default_splitter.py
2
+
3
+ from webapp.services.phonemes.phoneme_dict import PHONEMES
4
+
5
+
6
+ from webapp.services.phonemes.phoneme_dict import PHONEMES
7
+
8
+ # Treat the list as a set for fast lookup
9
+ PHONEME_SET = set(PHONEMES)
10
+
11
+ from webapp.services.phonemes.phoneme_dict import PHONEMES
12
+
13
+ PHONEME_SET = set(PHONEMES)
14
+
15
+ def _apply_brightness_overrides(syllable: str, phonemes: list[str]) -> list[str]:
16
+ # Rule: if the syllable is 'ecce', override the final vowel to 'ay'
17
+ if syllable == "ecce" and phonemes and phonemes[-1] in {"e", "eh", "ae"}:
18
+ phonemes[-1] = "ay"
19
+ return phonemes
20
+
21
+ def latin_phoneme_split(syllable: str) -> list[str]:
22
+ syllable = syllable.lower()
23
+ result = []
24
+
25
+ # First try: one-letter phonemes
26
+ all_valid = True
27
+ for ch in syllable:
28
+ if ch in PHONEME_SET:
29
+ result.append(ch)
30
+ else:
31
+ all_valid = False
32
+ break
33
+
34
+ if all_valid:
35
+ return _apply_brightness_overrides(syllable, result)
36
+
37
+ # Greedy 2-letter then 1-letter fallback
38
+ result = []
39
+ i = 0
40
+ while i < len(syllable):
41
+ two = syllable[i:i+2]
42
+ if len(two) == 2 and two in PHONEME_SET:
43
+ result.append(two)
44
+ i += 2
45
+ elif syllable[i] in PHONEME_SET:
46
+ result.append(syllable[i])
47
+ i += 1
48
+ else:
49
+ result.append("a") # fallback
50
+ i += 1
51
+
52
+ return _apply_brightness_overrides(syllable, result)
53
+
54
+
55
+ def split_syllable(syllable: str, note_duration: float, tempo: float, pitch: str) -> list[dict]:
56
+ phonemes = latin_phoneme_split(syllable)
57
+ duration_per = max(0.05, (note_duration / len(phonemes)) * (60 / tempo))
58
+ return [
59
+ {
60
+ "phoneme": ph if ph in PHONEME_SET else "a",
61
+ "duration": duration_per,
62
+ "pitch": pitch
63
+ }
64
+ for ph in phonemes
65
+ ]