whisper-v3-zero-dev

Runtime error

App Files Files Community

devilent2 commited on Apr 7, 2024

Commit

d7e5659

verified ·

1 Parent(s): c67e757

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -1

app.py CHANGED Viewed

@@ -6,6 +6,10 @@ import gradio as gr
 import spaces
 from transformers import pipeline
 from transformers.pipelines.audio_utils import ffmpeg_read
 DEFAULT_MODEL_NAME = "distil-whisper/distil-large-v3"
 BATCH_SIZE = 8
@@ -140,6 +144,23 @@ def handle_upload_audio(audio_path,model_name,old_transcription=''):
     (text,transcription_time_output)=transcribe(audio_path,model_name)
     return text+'\n\n'+old_transcription, transcription_time_output
 graudio=gr.Audio(type="filepath",show_download_button=True)
 grmodel_textbox=gr.Textbox(
             label="Model Name",
@@ -172,7 +193,20 @@ demo = gr.Blocks()
 with demo:
-    gr.TabbedInterface([mf_transcribe, ], ["Audio",])
     with gr.Row():
         refresh_button = gr.Button("Refresh Status")  # Create a refresh button

 import spaces
 from transformers import pipeline
 from transformers.pipelines.audio_utils import ffmpeg_read
+import base64
+import requests
 DEFAULT_MODEL_NAME = "distil-whisper/distil-large-v3"
 BATCH_SIZE = 8
     (text,transcription_time_output)=transcribe(audio_path,model_name)
     return text+'\n\n'+old_transcription, transcription_time_output
+def handle_base64_audio(base64_data, model_name, old_transcription=''):
+    # Decode base64 data and save it as a temporary audio file
+    binary_data = base64.b64decode(base64_data)
+    audio_path = "temp_audio.wav"
+    with open(audio_path, "wb") as f:
+        f.write(binary_data)
+    # Transcribe the audio file
+    (text, transcription_time_output) = transcribe(audio_path, model_name)
+    # Remove the temporary audio file
+    import os
+    os.remove(audio_path)
+    return text + '\n\n' + old_transcription, transcription_time_output
 graudio=gr.Audio(type="filepath",show_download_button=True)
 grmodel_textbox=gr.Textbox(
             label="Model Name",
 with demo:
+     gr.TabbedInterface(
+        [
+            mf_transcribe,
+            gr.Interface(
+                fn=handle_base64_audio,
+                inputs=[
+                    gr.Textbox(label="Base64 Audio Data URL", placeholder="Enter the base64 audio data URL"),
+                    grmodel_textbox,
+                ],
+                outputs=groutputs,
+            ),
+        ],
+        ["Audio", "Base64 Audio Data URL"],
+    )
     with gr.Row():
         refresh_button = gr.Button("Refresh Status")  # Create a refresh button