badrex commited on
Commit
6ebefda
·
verified ·
1 Parent(s): 08eda6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -2
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import gradio as gr
3
  import spaces
4
  import torch
@@ -33,7 +34,16 @@ def process_audio(audio_path):
33
  if not audio_path:
34
  return "Please upload an audio file."
35
 
36
- inputs = inputs = processor(audio_path, sampling_rate=16000, return_tensors="pt")
 
 
 
 
 
 
 
 
 
37
  inputs = inputs.to(device, dtype=torch.bfloat16)
38
 
39
  with torch.no_grad():
@@ -48,7 +58,7 @@ def process_audio(audio_path):
48
 
49
  # Define Gradio interface
50
  with gr.Blocks(title="Voxtral Demo") as demo:
51
- gr.Markdown("<div>JASR v1.1 🎙️ <br>Speech Recognition for Dialectal Arabic</div>")
52
  gr.Markdown("Upload an audio file and get a transcription from JASR v1.1.")
53
 
54
  with gr.Row():
 
1
  import os
2
+ import torchaudio
3
  import gradio as gr
4
  import spaces
5
  import torch
 
34
  if not audio_path:
35
  return "Please upload an audio file."
36
 
37
+ # get audio array
38
+ audio_array, sample_rate = torchaudio.load(audio_path)
39
+
40
+ # if sample rate is not 16000, resample to 16000
41
+ if sample_rate != 16000:
42
+ audio_array = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(audio_array)
43
+
44
+ audio_array = audio_array.to(device)
45
+
46
+ inputs = inputs = processor(audio_array, sampling_rate=16000, return_tensors="pt")
47
  inputs = inputs.to(device, dtype=torch.bfloat16)
48
 
49
  with torch.no_grad():
 
58
 
59
  # Define Gradio interface
60
  with gr.Blocks(title="Voxtral Demo") as demo:
61
+ gr.Markdown("#JASR v1.1 🎙️ <Speech Recognition for Dialectal Arabic")
62
  gr.Markdown("Upload an audio file and get a transcription from JASR v1.1.")
63
 
64
  with gr.Row():