Spaces:

sheep52031
/

mediatek-asr-test

Runtime error

App Files Files Community

sheep52031 commited on Sep 4

Commit

c989afe

verified ·

1 Parent(s): e3aaa9a

🎤 MediaTek ASR 台灣國語測試 Space 初始版本

Browse files

Files changed (1) hide show

app.py +97 -43

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """
 MediaTek Breeze-ASR-25 台灣國語識別測試 Space
 適用於 HuggingFace Zero GPU Spaces 部署
 """
 import gradio as gr
@@ -10,17 +11,20 @@ import torch
 import time
 import torchaudio
-# 全域模型變數
-asr_model = None
-@spaces.GPU(duration=60)
-def load_asr_model():
-    """載入 MediaTek ASR 模型"""
-    global asr_model
-    print("🔄 載入 MediaTek Breeze-ASR-25 模型...")
     try:
         asr_model = pipeline(
             "automatic-speech-recognition",
             model="MediaTek-Research/Breeze-ASR-25",
@@ -29,80 +33,130 @@ def load_asr_model():
             return_timestamps=True
         )
-        print("✅ MediaTek Breeze-ASR-25 載入成功")
-        return "✅ 模型載入完成"
-    except Exception as e:
-        print(f"❌ 模型載入失敗: {str(e)}")
-        return f"❌ 模型載入失敗: {str(e)}"
-# 載入模型
-load_status = load_asr_model()
-@spaces.GPU(duration=30)
-def transcribe_audio(audio_file):
-    """ASR 推論與效能測試"""
-    global asr_model
-    if audio_file is None:
-        return "❌ 請上傳音訊檔案", "", ""
-    if asr_model is None:
-        return "❌ 模型尚未載入", "", ""
-    start_time = time.time()
-    try:
         # 載入音訊檔案獲取長度
         waveform, sample_rate = torchaudio.load(audio_file)
         audio_duration = waveform.shape[1] / sample_rate
         # 執行 ASR 推論
         result = asr_model(audio_file)
-        # 計算處理時間
-        process_time = time.time() - start_time
-        rtf = process_time / audio_duration
         # 提取識別結果
         transcript = result["text"] if isinstance(result, dict) else str(result)
         # 格式化性能指標
-        performance = f"""⏱️ 處理時間: {process_time:.2f}s
 🎵 音訊長度: {audio_duration:.2f}s
 📈 RTF: {rtf:.3f} ({'實時' if rtf < 1.0 else '非實時'})
-💾 模型: MediaTek Breeze-ASR-25"""
         return transcript, performance, "✅ 識別成功"
     except Exception as e:
-        return f"❌ 處理失敗: {str(e)}", "", "❌ 處理失敗"
 # Gradio 界面
 with gr.Blocks(title="MediaTek ASR 台灣國語測試") as demo:
     gr.Markdown("# 🎤 MediaTek Breeze-ASR-25 台灣國語識別測試")
-    gr.Markdown(f"**模型狀態**: {load_status}")
     with gr.Row():
         with gr.Column():
             audio_input = gr.Audio(
                 type="filepath",
-                label="上傳音訊檔案 (wav, mp3, m4a)"
             )
-            submit_btn = gr.Button("🚀 開始識別", variant="primary")
         with gr.Column():
             transcript_output = gr.Textbox(
-                label="✨ 識別結果",
                 lines=5,
                 placeholder="識別結果將顯示在這裡..."
             )
             performance_output = gr.Textbox(
                 label="⚡ 性能指標",
-                lines=4
             )
-            status_output = gr.Textbox(label="📊 狀態")
     submit_btn.click(
         transcribe_audio,
         inputs=[audio_input],

 """
 MediaTek Breeze-ASR-25 台灣國語識別測試 Space
 適用於 HuggingFace Zero GPU Spaces 部署
+修復版：解決 ZeroGPU 會話間模型載入問題
 """
 import gradio as gr
 import time
 import torchaudio
+@spaces.GPU(duration=60)
+def transcribe_audio(audio_file):
+    """ASR 推論與效能測試 - 每次調用時載入模型"""
+    if audio_file is None:
+        return "❌ 請上傳音訊檔案", "", ""
+    start_total = time.time()
     try:
+        # 每次推論時載入模型（ZeroGPU 限制）
+        print("🔄 載入 MediaTek Breeze-ASR-25 模型...")
+        model_load_start = time.time()
         asr_model = pipeline(
             "automatic-speech-recognition",
             model="MediaTek-Research/Breeze-ASR-25",
             return_timestamps=True
         )
+        model_load_time = time.time() - model_load_start
+        print(f"✅ 模型載入完成 ({model_load_time:.2f}s)")
         # 載入音訊檔案獲取長度
         waveform, sample_rate = torchaudio.load(audio_file)
         audio_duration = waveform.shape[1] / sample_rate
         # 執行 ASR 推論
+        inference_start = time.time()
         result = asr_model(audio_file)
+        inference_time = time.time() - inference_start
+        # 計算總處理時間
+        total_time = time.time() - start_total
+        rtf = total_time / audio_duration
         # 提取識別結果
         transcript = result["text"] if isinstance(result, dict) else str(result)
+        # 檢查 GPU 記憶體使用
+        gpu_info = ""
+        if torch.cuda.is_available():
+            gpu_memory = torch.cuda.memory_allocated() / 1024**3
+            gpu_info = f"💾 GPU 記憶體: {gpu_memory:.2f}GB"
         # 格式化性能指標
+        performance = f"""⏱️ 總處理時間: {total_time:.2f}s
+🔄 模型載入時間: {model_load_time:.2f}s
+🎯 推論時間: {inference_time:.2f}s
 🎵 音訊長度: {audio_duration:.2f}s
 📈 RTF: {rtf:.3f} ({'實時' if rtf < 1.0 else '非實時'})
+💾 模型: MediaTek Breeze-ASR-25
+{gpu_info}"""
         return transcript, performance, "✅ 識別成功"
     except Exception as e:
+        error_msg = f"❌ 處理失敗: {str(e)}"
+        print(error_msg)
+        return error_msg, "", "❌ 處理失敗"
+def get_model_info():
+    """獲取模型資訊 (CPU 函數)"""
+    return """🤖 MediaTek Breeze-ASR-25 模型資訊:
+- 基於 Whisper 架構，專為台灣國語優化
+- 支援繁體中文語音識別
+- ZeroGPU 動態載入模式
+- 每次推論重新載入以確保穩定性"""
 # Gradio 界面
 with gr.Blocks(title="MediaTek ASR 台灣國語測試") as demo:
     gr.Markdown("# 🎤 MediaTek Breeze-ASR-25 台灣國語識別測試")
+    gr.Markdown("**專為台灣國語優化的語音識別測試平台**")
+    # 模型資訊顯示
+    with gr.Accordion("🤖 模型資訊", open=False):
+        model_info = gr.Textbox(
+            value=get_model_info(),
+            label="模型詳細資訊",
+            lines=6,
+            interactive=False
+        )
     with gr.Row():
         with gr.Column():
+            gr.Markdown("### 🎙️ 音訊輸入")
             audio_input = gr.Audio(
                 type="filepath",
+                label="上傳音訊檔案 (wav, mp3, m4a)",
+                format="wav"
             )
+            gr.Markdown("### 📋 測試說明")
+            gr.Markdown("""
+            - 🎯 上傳 5-60 秒的台灣國語音訊
+            - 🔊 建議使用清晰、低噪音的錄音
+            - ⚡ 每次識別會重新載入模型 (ZeroGPU 限制)
+            - 📊 系統會顯示詳細的性能指標
+            """)
+            submit_btn = gr.Button("🚀 開始識別", variant="primary", size="lg")
         with gr.Column():
+            gr.Markdown("### 📄 識別結果")
             transcript_output = gr.Textbox(
+                label="✨ 識別文字",
                 lines=5,
                 placeholder="識別結果將顯示在這裡..."
             )
             performance_output = gr.Textbox(
                 label="⚡ 性能指標",
+                lines=8,
+                placeholder="性能數據將顯示在這裡..."
             )
+            status_output = gr.Textbox(
+                label="📊 處理狀態",
+                lines=2
+            )
+    # 使用範例
+    with gr.Accordion("📖 使用範例與 API", open=False):
+        gr.Markdown("""
+        ## 🔗 Gradio Client API 使用
+        ```python
+        from gradio_client import Client
+        client = Client("sheep52031/mediatek-asr-test")
+        result = client.predict("audio_file.wav", api_name="/predict")
+        transcript = result[0]    # 識別文字
+        performance = result[1]   # 性能指標
+        status = result[2]        # 處理狀態
+        ```
+        ## 📊 評估指標
+        - **RTF < 1.0**: 實時處理能力
+        - **準確度**: 台灣國語識別正確率
+        - **處理時間**: 總耗時包含模型載入
+        """)
+    # 事件綁定
     submit_btn.click(
         transcribe_audio,
         inputs=[audio_input],