Spaces:
Sleeping
Sleeping
Liam Pond
commited on
Commit
·
9062c1f
1
Parent(s):
c79c7f8
smarter phonemizer
Browse files- app.py +69 -66
- webapp/services/defaults/default_splitter.py +65 -0
app.py
CHANGED
|
@@ -10,6 +10,8 @@ import requests
|
|
| 10 |
import zipfile
|
| 11 |
import streamlit.components.v1 as components
|
| 12 |
from pathlib import Path
|
|
|
|
|
|
|
| 13 |
|
| 14 |
def patch_config_yaml_files():
|
| 15 |
root = "/tmp/cantussvs_v1"
|
|
@@ -174,87 +176,87 @@ full_phoneme_list_display = [phoneme_display_map.get(p, p) for p in permitted_ph
|
|
| 174 |
# Pitch list D4-D5
|
| 175 |
allowed_pitches = ["D4", "D#4", "E4", "F4", "F#4", "G4", "G#4", "A4", "A#4", "B4", "C5", "C#5", "D5"]
|
| 176 |
|
| 177 |
-
#
|
| 178 |
-
|
| 179 |
|
| 180 |
-
|
| 181 |
-
#
|
| 182 |
|
| 183 |
-
|
| 184 |
-
|
| 185 |
|
| 186 |
-
|
| 187 |
-
|
| 188 |
|
| 189 |
-
|
| 190 |
|
| 191 |
-
#
|
| 192 |
|
| 193 |
-
|
| 194 |
|
| 195 |
-
|
| 196 |
-
|
| 197 |
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
|
| 202 |
-
|
| 203 |
|
| 204 |
-
|
| 205 |
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
|
| 211 |
-
|
| 212 |
|
| 213 |
-
|
| 214 |
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
|
| 221 |
-
|
| 222 |
|
| 223 |
-
|
| 224 |
-
|
| 225 |
|
| 226 |
-
|
| 227 |
|
| 228 |
-
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
|
| 234 |
-
|
| 235 |
|
| 236 |
-
|
| 237 |
|
| 238 |
-
|
| 239 |
|
| 240 |
-
|
| 241 |
-
|
| 242 |
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
|
| 259 |
filetype = st.selectbox("Select file type:", ["MEI", "DS"])
|
| 260 |
|
|
@@ -301,15 +303,16 @@ if filetype == "MEI":
|
|
| 301 |
for note in st.session_state.original_raw_notes:
|
| 302 |
syllable_text = note["lyric"]
|
| 303 |
pitch = note["pitch"]
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
|
|
|
| 313 |
|
| 314 |
if "edited_syllables" not in st.session_state:
|
| 315 |
st.session_state.edited_syllables = syllable_groups
|
|
|
|
| 10 |
import zipfile
|
| 11 |
import streamlit.components.v1 as components
|
| 12 |
from pathlib import Path
|
| 13 |
+
from webapp.services.defaults.default_splitter import split_syllable
|
| 14 |
+
|
| 15 |
|
| 16 |
def patch_config_yaml_files():
|
| 17 |
root = "/tmp/cantussvs_v1"
|
|
|
|
| 176 |
# Pitch list D4-D5
|
| 177 |
allowed_pitches = ["D4", "D#4", "E4", "F4", "F#4", "G4", "G#4", "A4", "A#4", "B4", "C5", "C#5", "D5"]
|
| 178 |
|
| 179 |
+
# Title
|
| 180 |
+
st.title("CantusSVS: Latin Singing Voice Synthesis")
|
| 181 |
|
| 182 |
+
st.markdown("""
|
| 183 |
+
# About CantusSVS
|
| 184 |
|
| 185 |
+
<p>CantusSVS is a web-based Singing Voice Synthesis (SVS) system designed for composers and musicians to synthesize Latin chant audio from a custom musical score.
|
| 186 |
+
Built on top of the DiffSinger AI model, CantusSVS enables detailed, precise control over melody, rhythm, phonemes, and timing without any programming knowledge required.</p>
|
| 187 |
|
| 188 |
+
<p>Designed by Liam Pond as the final project for MUS6329X: Projet en informatique musicale (Prof. Dominic Thibault) at the Université de Montréal.
|
| 189 |
+
You can view this project's GitHub repository [here](https://github.com/liampond/CantusSVS).</p>
|
| 190 |
|
| 191 |
+
---
|
| 192 |
|
| 193 |
+
# How to Use CantusSVS
|
| 194 |
|
| 195 |
+
## 1. Compose Your Music
|
| 196 |
|
| 197 |
+
Compose the chant you want to synthesize using the notation software of your choice. [MuseScore 4](https://musescore.org/en/download) is recommended.
|
| 198 |
+
The chant must adhere to the following conditions:
|
| 199 |
|
| 200 |
+
- Monophonic only (one note at a time, no harmonies or chords)
|
| 201 |
+
- Pitch range of <span class="tooltip">**D4 to D5**<span class="tooltiptext">Because training data was limited outside this range, synthesis outside these pitches is very poor.</span></span>
|
| 202 |
+
- Lyrics (Latin) under each note, separated by syllable
|
| 203 |
|
| 204 |
+
## 2. Export Your Score to MEI
|
| 205 |
|
| 206 |
+
When your score is complete, export it to MEI.
|
| 207 |
|
| 208 |
+
In MuseScore:
|
| 209 |
+
- Go to **File → Export**
|
| 210 |
+
- Choose the `.mei` file format
|
| 211 |
+
- Save it to your computer
|
| 212 |
|
| 213 |
+
## 3. Upload Your Score to CantusSVS
|
| 214 |
|
| 215 |
+
In the CantusSVS web app:
|
| 216 |
|
| 217 |
+
- Select **MEI** mode
|
| 218 |
+
- Adjust the **tempo** if necessary using the provided slider
|
| 219 |
+
- Upload your `.mei` file
|
| 220 |
+
- Your score will be displayed using Verovio
|
| 221 |
+
- You may use the demo `.mei` file if you wish
|
| 222 |
|
| 223 |
+
## 4. Edit Phonemes, Durations, and Pitches
|
| 224 |
|
| 225 |
+
CantusSVS automatically suggests phoneme splits for each syllable.
|
| 226 |
+
However, you will have the opportunity to review phonemes, durations, and pitches.
|
| 227 |
|
| 228 |
+
## 5. Synthesize the Audio
|
| 229 |
|
| 230 |
+
When you're done:
|
| 231 |
|
| 232 |
+
- Click **Confirm**
|
| 233 |
+
- CantusSVS will create a `.ds` file which are processed through pretrained DiffSinger models
|
| 234 |
+
- The synthesized chant will be generated
|
| 235 |
|
| 236 |
+
This can take a few minutes depending on input length
|
| 237 |
|
| 238 |
+
## 6. Listen and Download
|
| 239 |
|
| 240 |
+
After synthesis you can either listen to your chant directly in the app or download a `.wav` file to your computer.
|
| 241 |
|
| 242 |
+
---
|
| 243 |
+
""", unsafe_allow_html=True)
|
| 244 |
|
| 245 |
+
st.markdown("""
|
| 246 |
+
<script>
|
| 247 |
+
const tooltipSpan = window.parent.document.querySelector('span[style*="border-bottom: 1px dotted black"]');
|
| 248 |
+
if (tooltipSpan) {
|
| 249 |
+
tooltipSpan.addEventListener('mouseover', () => {
|
| 250 |
+
tooltipSpan.children[0].style.visibility = 'visible';
|
| 251 |
+
tooltipSpan.children[0].style.opacity = 1;
|
| 252 |
+
});
|
| 253 |
+
tooltipSpan.addEventListener('mouseout', () => {
|
| 254 |
+
tooltipSpan.children[0].style.visibility = 'hidden';
|
| 255 |
+
tooltipSpan.children[0].style.opacity = 0;
|
| 256 |
+
});
|
| 257 |
+
}
|
| 258 |
+
</script>
|
| 259 |
+
""", unsafe_allow_html=True)
|
| 260 |
|
| 261 |
filetype = st.selectbox("Select file type:", ["MEI", "DS"])
|
| 262 |
|
|
|
|
| 303 |
for note in st.session_state.original_raw_notes:
|
| 304 |
syllable_text = note["lyric"]
|
| 305 |
pitch = note["pitch"]
|
| 306 |
+
syllable = split_syllable(
|
| 307 |
+
syllable=syllable_text,
|
| 308 |
+
note_duration=note["duration"],
|
| 309 |
+
tempo=tempo,
|
| 310 |
+
pitch=pitch
|
| 311 |
+
)
|
| 312 |
+
syllable_groups.append({
|
| 313 |
+
"syllable": syllable_text,
|
| 314 |
+
"phonemes": syllable
|
| 315 |
+
})
|
| 316 |
|
| 317 |
if "edited_syllables" not in st.session_state:
|
| 318 |
st.session_state.edited_syllables = syllable_groups
|
webapp/services/defaults/default_splitter.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# webapp/services/defaults/default_splitter.py
|
| 2 |
+
|
| 3 |
+
from webapp.services.phonemes.phoneme_dict import PHONEMES
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
from webapp.services.phonemes.phoneme_dict import PHONEMES
|
| 7 |
+
|
| 8 |
+
# Treat the list as a set for fast lookup
|
| 9 |
+
PHONEME_SET = set(PHONEMES)
|
| 10 |
+
|
| 11 |
+
from webapp.services.phonemes.phoneme_dict import PHONEMES
|
| 12 |
+
|
| 13 |
+
PHONEME_SET = set(PHONEMES)
|
| 14 |
+
|
| 15 |
+
def _apply_brightness_overrides(syllable: str, phonemes: list[str]) -> list[str]:
|
| 16 |
+
# Rule: if the syllable is 'ecce', override the final vowel to 'ay'
|
| 17 |
+
if syllable == "ecce" and phonemes and phonemes[-1] in {"e", "eh", "ae"}:
|
| 18 |
+
phonemes[-1] = "ay"
|
| 19 |
+
return phonemes
|
| 20 |
+
|
| 21 |
+
def latin_phoneme_split(syllable: str) -> list[str]:
|
| 22 |
+
syllable = syllable.lower()
|
| 23 |
+
result = []
|
| 24 |
+
|
| 25 |
+
# First try: one-letter phonemes
|
| 26 |
+
all_valid = True
|
| 27 |
+
for ch in syllable:
|
| 28 |
+
if ch in PHONEME_SET:
|
| 29 |
+
result.append(ch)
|
| 30 |
+
else:
|
| 31 |
+
all_valid = False
|
| 32 |
+
break
|
| 33 |
+
|
| 34 |
+
if all_valid:
|
| 35 |
+
return _apply_brightness_overrides(syllable, result)
|
| 36 |
+
|
| 37 |
+
# Greedy 2-letter then 1-letter fallback
|
| 38 |
+
result = []
|
| 39 |
+
i = 0
|
| 40 |
+
while i < len(syllable):
|
| 41 |
+
two = syllable[i:i+2]
|
| 42 |
+
if len(two) == 2 and two in PHONEME_SET:
|
| 43 |
+
result.append(two)
|
| 44 |
+
i += 2
|
| 45 |
+
elif syllable[i] in PHONEME_SET:
|
| 46 |
+
result.append(syllable[i])
|
| 47 |
+
i += 1
|
| 48 |
+
else:
|
| 49 |
+
result.append("a") # fallback
|
| 50 |
+
i += 1
|
| 51 |
+
|
| 52 |
+
return _apply_brightness_overrides(syllable, result)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def split_syllable(syllable: str, note_duration: float, tempo: float, pitch: str) -> list[dict]:
|
| 56 |
+
phonemes = latin_phoneme_split(syllable)
|
| 57 |
+
duration_per = max(0.05, (note_duration / len(phonemes)) * (60 / tempo))
|
| 58 |
+
return [
|
| 59 |
+
{
|
| 60 |
+
"phoneme": ph if ph in PHONEME_SET else "a",
|
| 61 |
+
"duration": duration_per,
|
| 62 |
+
"pitch": pitch
|
| 63 |
+
}
|
| 64 |
+
for ph in phonemes
|
| 65 |
+
]
|