Spaces:
Runtime error
Runtime error
| """ | |
| MediaTek Breeze-ASR-25 台灣國語識別測試 Space | |
| 適用於 HuggingFace Zero GPU Spaces 部署 | |
| 修復版:解決 ZeroGPU 會話間模型載入問題 | |
| """ | |
| import gradio as gr | |
| import spaces | |
| from transformers import pipeline | |
| import torch | |
| import time | |
| import torchaudio | |
| def transcribe_audio(audio_file): | |
| """ASR 推論與效能測試 - 每次調用時載入模型""" | |
| if audio_file is None: | |
| return "❌ 請上傳音訊檔案", "", "" | |
| start_total = time.time() | |
| try: | |
| # 每次推論時載入模型(ZeroGPU 限制) | |
| print("🔄 載入 MediaTek Breeze-ASR-25 模型...") | |
| model_load_start = time.time() | |
| asr_model = pipeline( | |
| "automatic-speech-recognition", | |
| model="MediaTek-Research/Breeze-ASR-25", | |
| torch_dtype=torch.float16, | |
| device="cuda", | |
| return_timestamps=True | |
| ) | |
| model_load_time = time.time() - model_load_start | |
| print(f"✅ 模型載入完成 ({model_load_time:.2f}s)") | |
| # 載入音訊檔案獲取長度 | |
| waveform, sample_rate = torchaudio.load(audio_file) | |
| audio_duration = waveform.shape[1] / sample_rate | |
| # 執行 ASR 推論 | |
| inference_start = time.time() | |
| result = asr_model(audio_file) | |
| inference_time = time.time() - inference_start | |
| # 計算總處理時間 | |
| total_time = time.time() - start_total | |
| rtf = total_time / audio_duration | |
| # 提取識別結果 | |
| transcript = result["text"] if isinstance(result, dict) else str(result) | |
| # 檢查 GPU 記憶體使用 | |
| gpu_info = "" | |
| if torch.cuda.is_available(): | |
| gpu_memory = torch.cuda.memory_allocated() / 1024**3 | |
| gpu_info = f"💾 GPU 記憶體: {gpu_memory:.2f}GB" | |
| # 格式化性能指標 | |
| performance = f"""⏱️ 總處理時間: {total_time:.2f}s | |
| 🔄 模型載入時間: {model_load_time:.2f}s | |
| 🎯 推論時間: {inference_time:.2f}s | |
| 🎵 音訊長度: {audio_duration:.2f}s | |
| 📈 RTF: {rtf:.3f} ({'實時' if rtf < 1.0 else '非實時'}) | |
| 💾 模型: MediaTek Breeze-ASR-25 | |
| {gpu_info}""" | |
| return transcript, performance, "✅ 識別成功" | |
| except Exception as e: | |
| error_msg = f"❌ 處理失敗: {str(e)}" | |
| print(error_msg) | |
| return error_msg, "", "❌ 處理失敗" | |
| def get_model_info(): | |
| """獲取模型資訊 (CPU 函數)""" | |
| return """🤖 MediaTek Breeze-ASR-25 模型資訊: | |
| - 基於 Whisper 架構,專為台灣國語優化 | |
| - 支援繁體中文語音識別 | |
| - ZeroGPU 動態載入模式 | |
| - 每次推論重新載入以確保穩定性""" | |
| # Gradio 界面 | |
| with gr.Blocks(title="MediaTek ASR 台灣國語測試") as demo: | |
| gr.Markdown("# 🎤 MediaTek Breeze-ASR-25 台灣國語識別測試") | |
| gr.Markdown("**專為台灣國語優化的語音識別測試平台**") | |
| # 模型資訊顯示 | |
| with gr.Accordion("🤖 模型資訊", open=False): | |
| model_info = gr.Textbox( | |
| value=get_model_info(), | |
| label="模型詳細資訊", | |
| lines=6, | |
| interactive=False | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### 🎙️ 音訊輸入") | |
| audio_input = gr.Audio( | |
| type="filepath", | |
| label="上傳音訊檔案 (wav, mp3, m4a)", | |
| format="wav" | |
| ) | |
| gr.Markdown("### 📋 測試說明") | |
| gr.Markdown(""" | |
| - 🎯 上傳 5-60 秒的台灣國語音訊 | |
| - 🔊 建議使用清晰、低噪音的錄音 | |
| - ⚡ 每次識別會重新載入模型 (ZeroGPU 限制) | |
| - 📊 系統會顯示詳細的性能指標 | |
| """) | |
| submit_btn = gr.Button("🚀 開始識別", variant="primary", size="lg") | |
| with gr.Column(): | |
| gr.Markdown("### 📄 識別結果") | |
| transcript_output = gr.Textbox( | |
| label="✨ 識別文字", | |
| lines=5, | |
| placeholder="識別結果將顯示在這裡..." | |
| ) | |
| performance_output = gr.Textbox( | |
| label="⚡ 性能指標", | |
| lines=8, | |
| placeholder="性能數據將顯示在這裡..." | |
| ) | |
| status_output = gr.Textbox( | |
| label="📊 處理狀態", | |
| lines=2 | |
| ) | |
| # 使用範例 | |
| with gr.Accordion("📖 使用範例與 API", open=False): | |
| gr.Markdown(""" | |
| ## 🔗 Gradio Client API 使用 | |
| ```python | |
| from gradio_client import Client | |
| client = Client("sheep52031/mediatek-asr-test") | |
| result = client.predict("audio_file.wav", api_name="/predict") | |
| transcript = result[0] # 識別文字 | |
| performance = result[1] # 性能指標 | |
| status = result[2] # 處理狀態 | |
| ``` | |
| ## 📊 評估指標 | |
| - **RTF < 1.0**: 實時處理能力 | |
| - **準確度**: 台灣國語識別正確率 | |
| - **處理時間**: 總耗時包含模型載入 | |
| """) | |
| # 事件綁定 | |
| submit_btn.click( | |
| transcribe_audio, | |
| inputs=[audio_input], | |
| outputs=[transcript_output, performance_output, status_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |