Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| st.set_page_config(page_title="CantusSVS", layout="wide") | |
| import os | |
| import yaml | |
| import shutil | |
| import traceback | |
| import json | |
| import requests | |
| import zipfile | |
| import streamlit.components.v1 as components | |
| from pathlib import Path | |
| from webapp.services.defaults.default_splitter import split_syllable | |
| def patch_config_yaml_files(): | |
| root = "/tmp/cantussvs_v1" | |
| checkpoints_root = os.path.join(root, "checkpoints") | |
| data_root = os.path.join(root, "data") | |
| for dirpath, _, filenames in os.walk(checkpoints_root): | |
| for filename in filenames: | |
| if filename == "config.yaml": | |
| full_path = os.path.join(dirpath, filename) | |
| try: | |
| with open(full_path, "r") as f: | |
| config = yaml.safe_load(f) | |
| if not isinstance(config, dict): | |
| continue | |
| modified = False | |
| for key, value in config.items(): | |
| if isinstance(value, str): | |
| if value.startswith("checkpoints/"): | |
| rel = value.split("/", 1)[1] | |
| config[key] = os.path.join(checkpoints_root, rel) | |
| modified = True | |
| elif value.startswith("data/"): | |
| rel = value.split("/", 1)[1] | |
| config[key] = os.path.join(data_root, rel) | |
| modified = True | |
| if modified: | |
| with open(full_path, "w") as f: | |
| yaml.dump(config, f) | |
| print(f"✅ Patched paths in {full_path}") | |
| except Exception as e: | |
| print(f"❌ Failed to patch {full_path}: {e}") | |
| # Disable Streamlit file watcher | |
| os.environ['STREAMLIT_SERVER_FILE_WATCHER_TYPE'] = 'none' | |
| # Ensure project root is on the import path | |
| PROJECT_ROOT = Path(__file__).resolve().parent | |
| import sys | |
| sys.path.insert(0, str(PROJECT_ROOT)) | |
| from webapp.services.parsing.mei_parser import parse_mei_for_editor | |
| from webapp.services.parsing.ds_builder import build_ds_from_notes | |
| from webapp.services.parsing.ds_validator import validate_ds | |
| from webapp.services.phonemes.phoneme_dict import PHONEMES as permitted_phonemes | |
| from inference.pipeline import run_inference | |
| def safe_symlink(src, dst): | |
| try: | |
| if os.path.islink(dst): | |
| if os.readlink(dst) == src: | |
| print(f"✅ Symlink already correct: {dst} → {src}") | |
| return | |
| else: | |
| print(f"⚠️ Symlink exists but points elsewhere. Skipping: {dst}") | |
| return | |
| elif os.path.exists(dst): | |
| print(f"❗ Cannot create symlink, path exists and is not a symlink: {dst}") | |
| return | |
| os.symlink(src, dst) | |
| print(f"✅ Created symlink: {dst} → {src}") | |
| except Exception as e: | |
| print(f"❗ Failed to create symlink {dst} -> {src}: {e}") | |
| # Directories | |
| HF_CHECKPOINTS_DIR = "/tmp/cantussvs_v1/checkpoints" | |
| HF_DATA_DIR = "/tmp/cantussvs_v1/data" | |
| DEMO_FILES = PROJECT_ROOT / "webapp/demo_files" | |
| UPLOAD_MEI_DIR = PROJECT_ROOT / "webapp/uploaded_mei" | |
| UPLOAD_DS_DIR = PROJECT_ROOT / "webapp/uploaded_ds" | |
| TMP_DS_DIR = PROJECT_ROOT / "webapp/tmp_ds" | |
| OUTPUT_DIR = PROJECT_ROOT / "webapp/output" | |
| for d in [DEMO_FILES, UPLOAD_MEI_DIR, UPLOAD_DS_DIR, TMP_DS_DIR, OUTPUT_DIR]: | |
| d.mkdir(parents=True, exist_ok=True) | |
| def download_and_extract_from_hf(): | |
| url = "https://huggingface.co/datasets/liampond/CantusSVS/resolve/main/cantussvs_v1.zip" | |
| zip_path = "/tmp/cantussvs_v1.zip" | |
| extract_dir = "/tmp/cantussvs_v1" | |
| if not os.path.exists(extract_dir): | |
| st.write("📦 Downloading data + model from Hugging Face...") | |
| r = requests.get(url, stream=True) | |
| with open(zip_path, "wb") as f: | |
| for chunk in r.iter_content(chunk_size=8192): | |
| f.write(chunk) | |
| st.write("📂 Extracting contents...") | |
| with zipfile.ZipFile(zip_path, 'r') as zip_ref: | |
| zip_ref.extractall(extract_dir) | |
| # ✅ Only do this once, right after unzip | |
| patch_config_yaml_files() | |
| safe_symlink(os.path.join(extract_dir, "checkpoints"), "checkpoints") | |
| safe_symlink(os.path.join(extract_dir, "data"), "data") | |
| return extract_dir | |
| # Call it once and use it globally | |
| base_path = download_and_extract_from_hf() | |
| patch_config_yaml_files() | |
| st.write("✅ Loaded assets to:", base_path) | |
| # CSS styling | |
| # st.markdown(""" | |
| # <style> | |
| # html, body, [class*="css"] { font-size: 18px !important; } | |
| # div[data-testid="stSelectbox"] label, | |
| # div[data-testid="stNumberInput"] label, | |
| # div[data-testid="stTextInput"] label { font-size: 13px; padding-bottom: 0px; } | |
| # div[data-testid="stSlider"] label { font-size: 0px; } | |
| # div.stButton > button:first-child { | |
| # background-color: black; color: white; | |
| # font-size: 14px; padding: 4px 10px; | |
| # border-radius: 8px; | |
| # } | |
| # section[data-testid="stFileUploaderDropzone"] { padding: 2rem; } | |
| # .tooltip { | |
| # position: relative; | |
| # display: inline-block; | |
| # border-bottom: 1px dotted white; | |
| # cursor: help; | |
| # } | |
| # .tooltip .tooltiptext { | |
| # visibility: hidden; | |
| # width: 250px; | |
| # background-color: black; | |
| # color: #fff; | |
| # text-align: center; | |
| # border-radius: 6px; | |
| # padding: 6px; | |
| # position: absolute; | |
| # z-index: 1; | |
| # bottom: 125%; /* Position above */ | |
| # left: 50%; | |
| # margin-left: -125px; | |
| # opacity: 0; | |
| # transition: opacity 0.3s; | |
| # } | |
| # .tooltip:hover .tooltiptext { | |
| # visibility: visible; | |
| # opacity: 1; | |
| # } | |
| # </style> | |
| # """, unsafe_allow_html=True) | |
| # Phoneme mappings | |
| phoneme_display_map = { "ap": "Pause", "br": "Breath" } | |
| display_to_phoneme = {v: k for k, v in phoneme_display_map.items()} | |
| full_phoneme_list_display = [phoneme_display_map.get(p, p) for p in permitted_phonemes] | |
| # Pitch list D4-D5 | |
| allowed_pitches = ["D4", "D#4", "E4", "F4", "F#4", "G4", "G#4", "A4", "A#4", "B4", "C5", "C#5", "D5"] | |
| # Title | |
| st.title("CantusSVS: Latin Singing Voice Synthesis") | |
| st.markdown(""" | |
| # About CantusSVS | |
| <p>CantusSVS is a web-based Singing Voice Synthesis (SVS) system designed for composers and musicians to synthesize Latin chant audio from a custom musical score. | |
| Built on top of the DiffSinger AI model, CantusSVS enables detailed, precise control over melody, rhythm, phonemes, and timing without any programming knowledge required.</p> | |
| <p>Designed by Liam Pond as the final project for MUS6329X: Projet en informatique musicale (Prof. Dominic Thibault) at the Université de Montréal. For more information, you can view the README.md under the 'Files' tab of this Space.</p> | |
| You can find DiffSinger in the following paper: | |
| **DiffSinger: Singing Voice Synthesis via Shallow Diffusion Mechanism** | |
| Liu, Jinglin, Chengxi Li, Yi Ren, Feiyang Chen, and Zhou Zhao. 2022. "Diffsinger: Singing Voice Synthesis via Shallow Diffusion Mechanism." In *Proceedings of the AAAI Conference on Artificial Intelligence* 36 10: 11020–11028. [https://arxiv.org/abs/2105.02446](http://dx.doi.org/10.1609/aaai.v36i10.21350). | |
| Model training was done using Cedar, a cluster provided by the Digital Research Alliance of Canada. To train your own model locally, follow [this tutorial](https://youtu.be/Sxt11TAflV0?feature=shared) by [tigermeat](https://www.youtube.com/@spicytigermeat). | |
| For general help training and creating a dataset, [this tutorial](https://docs.google.com/document/d/1uMsepxbdUW65PfIWL1pt2OM6ZKa5ybTTJOpZ733Ht6s/view) by [PixPrucer](https://bsky.app/profile/pixprucer.bsky.social) is an excellent guide. For help, join the [DiffSinger Discord server](https://discord.gg/DZ6fhEUfnb). | |
| The dataset used for this project was built using [*Adventus: Dominica prima adventus Domini*](https://youtu.be/ThnPySybDJs?feature=shared), the first track from [Psallentes](https://psallentes.com/)' album *Salzinnes Saints*. Psallentes is a Belgian women's chorus that specializes in Late Medieval and Renaissance music. *Salzinnes Saints* is an album of music from the [Salzinnes Antiphonal](https://www.smu.ca/academics/archives/the-salzinnes-antiphonal.html), a mid-sixteenth century choirbook with the music and text for the Liturgy of the Hours. | |
| --- | |
| # How to Use CantusSVS | |
| ## 1. Compose Your Music | |
| Compose the chant you want to synthesize using the notation software of your choice. [MuseScore 4](https://musescore.org/en/download) is recommended. | |
| The chant must adhere to the following conditions: | |
| - Monophonic only (one note at a time, no harmonies or chords) | |
| - Pitch range of <span class="tooltip">**D4 to D5**<span class="tooltiptext">Because training data was limited outside this range, synthesis outside these pitches is very poor.</span></span> | |
| - Lyrics (Latin) under each note, separated by syllable | |
| ## 2. Export Your Score to MEI | |
| When your score is complete, export it to MEI. | |
| In MuseScore: | |
| - Go to **File → Export** | |
| - Choose the `.mei` file format | |
| - Save it to your computer | |
| ## 3. Upload Your Score to CantusSVS | |
| In the CantusSVS web app: | |
| - Select **MEI** mode | |
| - Adjust the **tempo** if necessary using the provided slider | |
| - Upload your `.mei` file | |
| - Your score will be displayed using Verovio | |
| - You may use the demo `.mei` file if you wish | |
| ## 4. Edit Phonemes, Durations, and Pitches | |
| CantusSVS automatically suggests phoneme splits for each syllable. | |
| However, you will have the opportunity to review phonemes, durations, and pitches. | |
| ## 5. Synthesize the Audio | |
| When you're done: | |
| - Click **Confirm** | |
| - CantusSVS will create a `.ds` file which are processed through pretrained DiffSinger models | |
| - The synthesized chant will be generated | |
| This can take a few minutes depending on input length | |
| ## 6. Listen and Download | |
| After synthesis you can either listen to your chant directly in the app or download a `.wav` file to your computer. | |
| --- | |
| """, unsafe_allow_html=True) | |
| st.markdown(""" | |
| <script> | |
| const tooltipSpan = window.parent.document.querySelector('span[style*="border-bottom: 1px dotted black"]'); | |
| if (tooltipSpan) { | |
| tooltipSpan.addEventListener('mouseover', () => { | |
| tooltipSpan.children[0].style.visibility = 'visible'; | |
| tooltipSpan.children[0].style.opacity = 1; | |
| }); | |
| tooltipSpan.addEventListener('mouseout', () => { | |
| tooltipSpan.children[0].style.visibility = 'hidden'; | |
| tooltipSpan.children[0].style.opacity = 0; | |
| }); | |
| } | |
| </script> | |
| """, unsafe_allow_html=True) | |
| filetype = st.selectbox("Select file type:", ["MEI", "DS"]) | |
| def handle_exception(context_message): | |
| st.error(f"{context_message}. See console.") | |
| print("\n" + "="*30) | |
| print(f"Exception during {context_message}") | |
| traceback.print_exc() | |
| print("="*30 + "\n") | |
| st.stop() | |
| if filetype == "MEI": | |
| st.header("1. Select MEI Source") | |
| use_demo = st.checkbox("Use demo MEI file", value=False) | |
| tempo = st.slider("Tempo (BPM)", 1, 300, 60) | |
| if use_demo: | |
| mei_path = DEMO_FILES / "Demo1.mei" | |
| if not mei_path.exists(): | |
| st.error("Demo MEI file missing.") | |
| st.stop() | |
| with open(mei_path, "rb") as f: | |
| mei_file_bytes = f.read() | |
| else: | |
| mei_file = st.file_uploader("Upload your MEI file", type="mei") | |
| if not mei_file: | |
| st.stop() | |
| mei_path = UPLOAD_MEI_DIR / mei_file.name | |
| with open(mei_path, "wb") as f: | |
| f.write(mei_file.getbuffer()) | |
| mei_file_bytes = mei_file.getvalue() | |
| mei_text = mei_file_bytes.decode("utf-8") | |
| try: | |
| raw_notes = parse_mei_for_editor(mei_path, tempo) | |
| except Exception: | |
| handle_exception("MEI parsing") | |
| # Always update session state | |
| st.session_state.original_raw_notes = raw_notes | |
| syllable_groups = [] | |
| for note in st.session_state.original_raw_notes: | |
| syllable_text = note["lyric"] | |
| pitch = note["pitch"] | |
| syllable = split_syllable( | |
| syllable=syllable_text, | |
| note_duration=note["duration"], | |
| tempo=tempo, | |
| pitch=pitch | |
| ) | |
| syllable_groups.append({ | |
| "syllable": syllable_text, | |
| "phonemes": syllable | |
| }) | |
| if "edited_syllables" not in st.session_state: | |
| st.session_state.edited_syllables = syllable_groups | |
| st.subheader("Score Preview") | |
| components.html(f"""<div id=\"app\" style=\"border: 1px solid lightgray; min-height: 400px;\"></div><script type=\"module\">import 'https://editor.verovio.org/javascript/app/verovio-app.js';const app=new Verovio.App(document.getElementById(\"app\"),{{defaultView:'document',documentZoom:4}});app.loadData(`{mei_text}`);</script>""", height=500) | |
| st.header("2. Edit Phonemes, Durations, and Pitches") | |
| updated_syllables = [] | |
| if "previous_tempo" not in st.session_state: | |
| st.session_state.previous_tempo = tempo | |
| if tempo != st.session_state.previous_tempo: | |
| for i, note in enumerate(st.session_state.original_raw_notes): | |
| updated = split_syllable( | |
| syllable=note["lyric"], | |
| note_duration=note["duration"], | |
| tempo=tempo, | |
| pitch=note["pitch"] | |
| ) | |
| # preserve existing phoneme values (if possible) | |
| for j, ph in enumerate(updated): | |
| try: | |
| existing = st.session_state.edited_syllables[i]["phonemes"][j] | |
| ph["phoneme"] = existing["phoneme"] | |
| ph["pitch"] = existing["pitch"] | |
| except IndexError: | |
| pass # new phoneme or longer split | |
| st.session_state.edited_syllables[i]["phonemes"] = updated | |
| st.session_state.previous_tempo = tempo | |
| for idx, group in enumerate(st.session_state.edited_syllables): | |
| st.markdown(f"#### {group['syllable'].capitalize()}") | |
| new_phonemes = [] | |
| for j, ph in enumerate(group["phonemes"]): | |
| col1, col2, col3, col4 = st.columns([3, 3, 3, 1]) # new column for delete button | |
| with col1: | |
| phoneme_display = st.selectbox( | |
| "Phoneme", | |
| full_phoneme_list_display, | |
| index=full_phoneme_list_display.index(phoneme_display_map.get(ph["phoneme"], ph["phoneme"])), | |
| key=f"phoneme_{idx}_{j}" | |
| ) | |
| phoneme_internal = display_to_phoneme.get(phoneme_display, phoneme_display) | |
| with col2: | |
| duration = st.number_input( | |
| "Duration (seconds)", | |
| min_value=0.0, max_value=5.0, | |
| value=float(ph["duration"]), | |
| step=0.01, format="%.2f", | |
| key=f"duration_num_{idx}_{j}" | |
| ) | |
| with col3: | |
| pitch = st.selectbox( | |
| "Pitch", | |
| allowed_pitches, | |
| index=allowed_pitches.index(ph["pitch"]) if ph["pitch"] in allowed_pitches else 0, | |
| key=f"pitch_{idx}_{j}" | |
| ) | |
| with col4: | |
| if st.button("❌", key=f"remove_{idx}_{j}"): | |
| group["phonemes"].pop(j) | |
| st.experimental_rerun() # force rerender safely | |
| new_phonemes.append({"phoneme": phoneme_internal, "duration": duration, "pitch": pitch}) | |
| if st.button("➕ Add Phoneme", key=f"add_phoneme_{idx}"): | |
| group["phonemes"].append({"phoneme": "a", "duration": 0.2, "pitch": "D4"}) | |
| st.experimental_rerun() | |
| updated_syllables.append({"syllable": group["syllable"], "phonemes": new_phonemes}) | |
| st.divider() | |
| st.session_state.edited_syllables = updated_syllables | |
| st.header("3. Synthesize") | |
| confirm_clicked = st.button("✅ Synthesize", key="confirm_button_mei") | |
| if confirm_clicked: | |
| ds_path = TMP_DS_DIR / f"{mei_path.stem}.ds" | |
| try: | |
| all_phonemes = [ph for syllable in st.session_state.edited_syllables for ph in syllable["phonemes"]] | |
| build_ds_from_notes(all_phonemes, ds_path) | |
| with open(ds_path, "r", encoding="utf-8") as f: | |
| ds_data = json.load(f) | |
| validate_ds(ds_data) | |
| st.success(f"DS file created: {ds_path.name}") | |
| except Exception: | |
| handle_exception("DS generation or validation") | |
| with st.spinner("Running DiffSinger inference…"): | |
| try: | |
| wav_path = run_inference(ds_path, OUTPUT_DIR, mei_path.stem) | |
| except Exception: | |
| handle_exception("inference") | |
| st.success("Synthesis complete!") | |
| st.audio(str(wav_path)) | |
| st.download_button("Download WAV", data=open(wav_path, "rb"), file_name=wav_path.name) | |
| elif filetype == "DS": | |
| st.header("1. Upload DS File") | |
| ds_file = st.file_uploader("Upload your .ds file", type=["ds", "json"]) | |
| st.header("2. Synthesize") | |
| synth_clicked = st.button("✅ Synthesize", key="synthesize_button_ds") | |
| if synth_clicked: | |
| if not ds_file: | |
| st.error("Please upload a .ds file.") | |
| st.stop() | |
| ds_path = UPLOAD_DS_DIR / ds_file.name | |
| with open(ds_path, "wb") as f: | |
| f.write(ds_file.getbuffer()) | |
| with open(ds_path, "r", encoding="utf-8") as f: | |
| ds_data = json.load(f) | |
| try: | |
| validate_ds(ds_data) | |
| except Exception as e: | |
| st.error(f"Invalid DS file: {e}") | |
| st.stop() | |
| with st.spinner("Running DiffSinger inference…"): | |
| try: | |
| wav_path = run_inference(ds_path, OUTPUT_DIR, ds_path.stem) | |
| except Exception: | |
| handle_exception("inference") | |
| st.success("Synthesis complete!") | |
| st.audio(str(wav_path)) | |
| st.download_button("Download WAV", data=open(wav_path, "rb"), file_name=wav_path.name) |