Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import torch | |
| import librosa | |
| import numpy as np | |
| from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification | |
| import torchaudio | |
| # Emojis for emotions | |
| EMOTION_EMOJI = { | |
| "angry": "😠", | |
| "happy": "😄", | |
| "sad": "😢", | |
| "neutral": "😐" | |
| } | |
| # Load processor and model | |
| processor = Wav2Vec2Processor.from_pretrained("ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition") | |
| model = Wav2Vec2ForSequenceClassification.from_pretrained("ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition") | |
| # Title | |
| st.title("🎙️ Voice Emotion Detector with Emoji") | |
| # Upload audio | |
| uploaded_file = st.file_uploader("Upload a WAV file", type=["wav"]) | |
| if uploaded_file is not None: | |
| st.audio(uploaded_file, format="audio/wav") | |
| # Load and preprocess audio | |
| speech_array, sampling_rate = torchaudio.load(uploaded_file) | |
| if sampling_rate != 16000: | |
| speech_array = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000)(speech_array) | |
| speech = speech_array.squeeze().numpy() | |
| inputs = processor(speech, sampling_rate=16000, return_tensors="pt", padding=True) | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| predicted_class_id = torch.argmax(logits).item() | |
| emotion = model.config.id2label[predicted_class_id] | |
| st.markdown(f"### Emotion Detected: **{emotion}** {EMOTION_EMOJI.get(emotion, '')}") | |