Spaces:

liampond
/

CantusSVS-hf

Sleeping

App Files Files Community

Liam Pond commited on May 1

Commit

9062c1f

1 Parent(s): c79c7f8

smarter phonemizer

Browse files

Files changed (2) hide show

app.py +69 -66
webapp/services/defaults/default_splitter.py +65 -0

app.py CHANGED Viewed

@@ -10,6 +10,8 @@ import requests
 import zipfile
 import streamlit.components.v1 as components
 from pathlib import Path
 def patch_config_yaml_files():
     root = "/tmp/cantussvs_v1"
@@ -174,87 +176,87 @@ full_phoneme_list_display = [phoneme_display_map.get(p, p) for p in permitted_ph
 # Pitch list D4-D5
 allowed_pitches = ["D4", "D#4", "E4", "F4", "F#4", "G4", "G#4", "A4", "A#4", "B4", "C5", "C#5", "D5"]
-# # Title
-# st.title("CantusSVS: Latin Singing Voice Synthesis")
-# st.markdown("""
-# # About CantusSVS
-# <p>CantusSVS is a web-based Singing Voice Synthesis (SVS) system designed for composers and musicians to synthesize Latin chant audio from a custom musical score.
-# Built on top of the DiffSinger AI model, CantusSVS enables detailed, precise control over melody, rhythm, phonemes, and timing without any programming knowledge required.</p>
-# <p>Designed by Liam Pond as the final project for MUS6329X: Projet en informatique musicale (Prof. Dominic Thibault) at the Université de Montréal.
-# You can view this project's GitHub repository [here](https://github.com/liampond/CantusSVS).</p>
-# ---
-# # How to Use CantusSVS
-# ## 1. Compose Your Music
-# Compose the chant you want to synthesize using the notation software of your choice. [MuseScore 4](https://musescore.org/en/download) is recommended.
-# The chant must adhere to the following conditions:
-# - Monophonic only (one note at a time, no harmonies or chords)
-# - Pitch range of <span class="tooltip">**D4 to D5**<span class="tooltiptext">Because training data was limited outside this range, synthesis outside these pitches is very poor.</span></span>
-# - Lyrics (Latin) under each note, separated by syllable
-# ## 2. Export Your Score to MEI
-# When your score is complete, export it to MEI.
-# In MuseScore:
-# - Go to **File → Export**
-# - Choose the `.mei` file format
-# - Save it to your computer
-# ## 3. Upload Your Score to CantusSVS
-# In the CantusSVS web app:
-# - Select **MEI** mode
-# - Adjust the **tempo** if necessary using the provided slider
-# - Upload your `.mei` file
-# - Your score will be displayed using Verovio
-# - You may use the demo `.mei` file if you wish
-# ## 4. Edit Phonemes, Durations, and Pitches
-# CantusSVS automatically suggests phoneme splits for each syllable.
-# However, you will have the opportunity to review phonemes, durations, and pitches.
-# ## 5. Synthesize the Audio
-# When you're done:
-# - Click **Confirm**
-# - CantusSVS will create a `.ds` file which are processed through pretrained DiffSinger models
-# - The synthesized chant will be generated
-# This can take a few minutes depending on input length
-# ## 6. Listen and Download
-# After synthesis you can either listen to your chant directly in the app or download a `.wav` file to your computer.
-# ---
-# """, unsafe_allow_html=True)
-# st.markdown("""
-# <script>
-# const tooltipSpan = window.parent.document.querySelector('span[style*="border-bottom: 1px dotted black"]');
-# if (tooltipSpan) {
-#     tooltipSpan.addEventListener('mouseover', () => {
-#         tooltipSpan.children[0].style.visibility = 'visible';
-#         tooltipSpan.children[0].style.opacity = 1;
-#     });
-#     tooltipSpan.addEventListener('mouseout', () => {
-#         tooltipSpan.children[0].style.visibility = 'hidden';
-#         tooltipSpan.children[0].style.opacity = 0;
-#     });
-# }
-# </script>
-# """, unsafe_allow_html=True)
 filetype = st.selectbox("Select file type:", ["MEI", "DS"])
@@ -301,15 +303,16 @@ if filetype == "MEI":
     for note in st.session_state.original_raw_notes:
         syllable_text = note["lyric"]
         pitch = note["pitch"]
-        phonemes = list(syllable_text) if syllable_text else ["a"]
-        syllable = []
-        for ph in phonemes:
-            syllable.append({
-                "phoneme": ph if ph in permitted_phonemes else "a",
-                "duration": max(0.05, (note["duration"] / len(phonemes)) * (60/tempo)),
-                "pitch": pitch if pitch in allowed_pitches else "D4"
-            })
-        syllable_groups.append({"syllable": syllable_text, "phonemes": syllable})
     if "edited_syllables" not in st.session_state:
         st.session_state.edited_syllables = syllable_groups

 import zipfile
 import streamlit.components.v1 as components
 from pathlib import Path
+from webapp.services.defaults.default_splitter import split_syllable
 def patch_config_yaml_files():
     root = "/tmp/cantussvs_v1"
 # Pitch list D4-D5
 allowed_pitches = ["D4", "D#4", "E4", "F4", "F#4", "G4", "G#4", "A4", "A#4", "B4", "C5", "C#5", "D5"]
+# Title
+st.title("CantusSVS: Latin Singing Voice Synthesis")
+st.markdown("""
+# About CantusSVS
+<p>CantusSVS is a web-based Singing Voice Synthesis (SVS) system designed for composers and musicians to synthesize Latin chant audio from a custom musical score.
+Built on top of the DiffSinger AI model, CantusSVS enables detailed, precise control over melody, rhythm, phonemes, and timing without any programming knowledge required.</p>
+<p>Designed by Liam Pond as the final project for MUS6329X: Projet en informatique musicale (Prof. Dominic Thibault) at the Université de Montréal.
+You can view this project's GitHub repository [here](https://github.com/liampond/CantusSVS).</p>
+---
+# How to Use CantusSVS
+## 1. Compose Your Music
+Compose the chant you want to synthesize using the notation software of your choice. [MuseScore 4](https://musescore.org/en/download) is recommended.
+The chant must adhere to the following conditions:
+- Monophonic only (one note at a time, no harmonies or chords)
+- Pitch range of <span class="tooltip">**D4 to D5**<span class="tooltiptext">Because training data was limited outside this range, synthesis outside these pitches is very poor.</span></span>
+- Lyrics (Latin) under each note, separated by syllable
+## 2. Export Your Score to MEI
+When your score is complete, export it to MEI.
+In MuseScore:
+- Go to **File → Export**
+- Choose the `.mei` file format
+- Save it to your computer
+## 3. Upload Your Score to CantusSVS
+In the CantusSVS web app:
+- Select **MEI** mode
+- Adjust the **tempo** if necessary using the provided slider
+- Upload your `.mei` file
+- Your score will be displayed using Verovio
+- You may use the demo `.mei` file if you wish
+## 4. Edit Phonemes, Durations, and Pitches
+CantusSVS automatically suggests phoneme splits for each syllable.
+However, you will have the opportunity to review phonemes, durations, and pitches.
+## 5. Synthesize the Audio
+When you're done:
+- Click **Confirm**
+- CantusSVS will create a `.ds` file which are processed through pretrained DiffSinger models
+- The synthesized chant will be generated
+This can take a few minutes depending on input length
+## 6. Listen and Download
+After synthesis you can either listen to your chant directly in the app or download a `.wav` file to your computer.
+---
+""", unsafe_allow_html=True)
+st.markdown("""
+<script>
+const tooltipSpan = window.parent.document.querySelector('span[style*="border-bottom: 1px dotted black"]');
+if (tooltipSpan) {
+    tooltipSpan.addEventListener('mouseover', () => {
+        tooltipSpan.children[0].style.visibility = 'visible';
+        tooltipSpan.children[0].style.opacity = 1;
+    });
+    tooltipSpan.addEventListener('mouseout', () => {
+        tooltipSpan.children[0].style.visibility = 'hidden';
+        tooltipSpan.children[0].style.opacity = 0;
+    });
+}
+</script>
+""", unsafe_allow_html=True)
 filetype = st.selectbox("Select file type:", ["MEI", "DS"])
     for note in st.session_state.original_raw_notes:
         syllable_text = note["lyric"]
         pitch = note["pitch"]
+        syllable = split_syllable(
+            syllable=syllable_text,
+            note_duration=note["duration"],
+            tempo=tempo,
+            pitch=pitch
+        )
+        syllable_groups.append({
+            "syllable": syllable_text,
+            "phonemes": syllable
+    })
     if "edited_syllables" not in st.session_state:
         st.session_state.edited_syllables = syllable_groups

webapp/services/defaults/default_splitter.py ADDED Viewed

	@@ -0,0 +1,65 @@

+# webapp/services/defaults/default_splitter.py
+from webapp.services.phonemes.phoneme_dict import PHONEMES
+from webapp.services.phonemes.phoneme_dict import PHONEMES
+# Treat the list as a set for fast lookup
+PHONEME_SET = set(PHONEMES)
+from webapp.services.phonemes.phoneme_dict import PHONEMES
+PHONEME_SET = set(PHONEMES)
+def _apply_brightness_overrides(syllable: str, phonemes: list[str]) -> list[str]:
+    # Rule: if the syllable is 'ecce', override the final vowel to 'ay'
+    if syllable == "ecce" and phonemes and phonemes[-1] in {"e", "eh", "ae"}:
+        phonemes[-1] = "ay"
+    return phonemes
+def latin_phoneme_split(syllable: str) -> list[str]:
+    syllable = syllable.lower()
+    result = []
+    # First try: one-letter phonemes
+    all_valid = True
+    for ch in syllable:
+        if ch in PHONEME_SET:
+            result.append(ch)
+        else:
+            all_valid = False
+            break
+    if all_valid:
+        return _apply_brightness_overrides(syllable, result)
+    # Greedy 2-letter then 1-letter fallback
+    result = []
+    i = 0
+    while i < len(syllable):
+        two = syllable[i:i+2]
+        if len(two) == 2 and two in PHONEME_SET:
+            result.append(two)
+            i += 2
+        elif syllable[i] in PHONEME_SET:
+            result.append(syllable[i])
+            i += 1
+        else:
+            result.append("a")  # fallback
+            i += 1
+    return _apply_brightness_overrides(syllable, result)
+def split_syllable(syllable: str, note_duration: float, tempo: float, pitch: str) -> list[dict]:
+    phonemes = latin_phoneme_split(syllable)
+    duration_per = max(0.05, (note_duration / len(phonemes)) * (60 / tempo))
+    return [
+        {
+            "phoneme": ph if ph in PHONEME_SET else "a",
+            "duration": duration_per,
+            "pitch": pitch
+        }
+        for ph in phonemes
+    ]