Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,7 +17,7 @@ except ImportError as e:
|
|
| 17 |
raise ImportError("Could not import SNAC. Make sure 'snac' is listed in requirements.txt and installed correctly.") from e
|
| 18 |
|
| 19 |
# --- Configuration ---
|
| 20 |
-
TARGET_SR =
|
| 21 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 22 |
print(f"Using device: {DEVICE}")
|
| 23 |
|
|
@@ -26,7 +26,7 @@ snac_model = None
|
|
| 26 |
try:
|
| 27 |
print("Loading SNAC model...")
|
| 28 |
start_time = time.time()
|
| 29 |
-
snac_model = SNAC.from_pretrained("hubertsiuzdak/
|
| 30 |
snac_model = snac_model.to(DEVICE)
|
| 31 |
snac_model.eval() # Set model to evaluation mode
|
| 32 |
end_time = time.time()
|
|
@@ -80,7 +80,7 @@ def process_audio(audio_filepath):
|
|
| 80 |
waveform_to_encode = resampler(original_waveform)
|
| 81 |
logs.append(f"Resampling complete. New Shape: {waveform_to_encode.shape}")
|
| 82 |
else:
|
| 83 |
-
logs.append("Waveform is already at the target sample rate (
|
| 84 |
waveform_to_encode = original_waveform
|
| 85 |
resample_end = time.time()
|
| 86 |
logs.append(f"Resampling time: {resample_end - resample_start:.2f}s")
|
|
@@ -141,12 +141,12 @@ def process_audio(audio_filepath):
|
|
| 141 |
|
| 142 |
# --- Gradio Interface ---
|
| 143 |
DESCRIPTION = """
|
| 144 |
-
This Space demonstrates the **SNAC (Scalable Neural Audio Codec)** model (`hubertsiuzdak/
|
| 145 |
1. Upload an audio file (wav, mp3, flac, etc.).
|
| 146 |
-
2. The audio will be automatically resampled to
|
| 147 |
-
3. The
|
| 148 |
4. These codes are then decoded back into audio by SNAC.
|
| 149 |
-
5. You can listen to the original, the
|
| 150 |
|
| 151 |
**Note:** Processing happens on the server. Larger files will take longer. If the input is stereo, only the first channel is processed.
|
| 152 |
"""
|
|
@@ -156,11 +156,11 @@ iface = gr.Interface(
|
|
| 156 |
inputs=gr.Audio(type="filepath", label="Upload Audio File"),
|
| 157 |
outputs=[
|
| 158 |
gr.Audio(label="Original Audio"),
|
| 159 |
-
gr.Audio(label="Resampled Audio (
|
| 160 |
gr.Audio(label="Reconstructed Audio (Output from SNAC)"),
|
| 161 |
gr.Textbox(label="Log Output", lines=15)
|
| 162 |
],
|
| 163 |
-
title="SNAC Audio Codec Demo (
|
| 164 |
description=DESCRIPTION,
|
| 165 |
examples=[
|
| 166 |
# Add paths to example audio files if you upload some to your Space repo
|
|
|
|
| 17 |
raise ImportError("Could not import SNAC. Make sure 'snac' is listed in requirements.txt and installed correctly.") from e
|
| 18 |
|
| 19 |
# --- Configuration ---
|
| 20 |
+
TARGET_SR = 32000 # SNAC operates at 32kHz
|
| 21 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 22 |
print(f"Using device: {DEVICE}")
|
| 23 |
|
|
|
|
| 26 |
try:
|
| 27 |
print("Loading SNAC model...")
|
| 28 |
start_time = time.time()
|
| 29 |
+
snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_32khz")
|
| 30 |
snac_model = snac_model.to(DEVICE)
|
| 31 |
snac_model.eval() # Set model to evaluation mode
|
| 32 |
end_time = time.time()
|
|
|
|
| 80 |
waveform_to_encode = resampler(original_waveform)
|
| 81 |
logs.append(f"Resampling complete. New Shape: {waveform_to_encode.shape}")
|
| 82 |
else:
|
| 83 |
+
logs.append("Waveform is already at the target sample rate (32kHz).")
|
| 84 |
waveform_to_encode = original_waveform
|
| 85 |
resample_end = time.time()
|
| 86 |
logs.append(f"Resampling time: {resample_end - resample_start:.2f}s")
|
|
|
|
| 141 |
|
| 142 |
# --- Gradio Interface ---
|
| 143 |
DESCRIPTION = """
|
| 144 |
+
This Space demonstrates the **SNAC (Scalable Neural Audio Codec)** model (`hubertsiuzdak/snac_32khz`).
|
| 145 |
1. Upload an audio file (wav, mp3, flac, etc.).
|
| 146 |
+
2. The audio will be automatically resampled to 32kHz if needed.
|
| 147 |
+
3. The 32kHz audio is encoded into discrete codes by SNAC.
|
| 148 |
4. These codes are then decoded back into audio by SNAC.
|
| 149 |
+
5. You can listen to the original, the 32kHz version (if resampled), and the final reconstructed audio.
|
| 150 |
|
| 151 |
**Note:** Processing happens on the server. Larger files will take longer. If the input is stereo, only the first channel is processed.
|
| 152 |
"""
|
|
|
|
| 156 |
inputs=gr.Audio(type="filepath", label="Upload Audio File"),
|
| 157 |
outputs=[
|
| 158 |
gr.Audio(label="Original Audio"),
|
| 159 |
+
gr.Audio(label="Resampled Audio (32kHz Input to SNAC)"),
|
| 160 |
gr.Audio(label="Reconstructed Audio (Output from SNAC)"),
|
| 161 |
gr.Textbox(label="Log Output", lines=15)
|
| 162 |
],
|
| 163 |
+
title="SNAC Audio Codec Demo (32kHz)",
|
| 164 |
description=DESCRIPTION,
|
| 165 |
examples=[
|
| 166 |
# Add paths to example audio files if you upload some to your Space repo
|