Spaces:
Runtime error
Runtime error
File size: 5,533 Bytes
1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 c989afe 1a073e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
"""
MediaTek Breeze-ASR-25 台灣國語識別測試 Space
適用於 HuggingFace Zero GPU Spaces 部署
修復版:解決 ZeroGPU 會話間模型載入問題
"""
import gradio as gr
import spaces
from transformers import pipeline
import torch
import time
import torchaudio
@spaces.GPU(duration=60)
def transcribe_audio(audio_file):
"""ASR 推論與效能測試 - 每次調用時載入模型"""
if audio_file is None:
return "❌ 請上傳音訊檔案", "", ""
start_total = time.time()
try:
# 每次推論時載入模型(ZeroGPU 限制)
print("🔄 載入 MediaTek Breeze-ASR-25 模型...")
model_load_start = time.time()
asr_model = pipeline(
"automatic-speech-recognition",
model="MediaTek-Research/Breeze-ASR-25",
torch_dtype=torch.float16,
device="cuda",
return_timestamps=True
)
model_load_time = time.time() - model_load_start
print(f"✅ 模型載入完成 ({model_load_time:.2f}s)")
# 載入音訊檔案獲取長度
waveform, sample_rate = torchaudio.load(audio_file)
audio_duration = waveform.shape[1] / sample_rate
# 執行 ASR 推論
inference_start = time.time()
result = asr_model(audio_file)
inference_time = time.time() - inference_start
# 計算總處理時間
total_time = time.time() - start_total
rtf = total_time / audio_duration
# 提取識別結果
transcript = result["text"] if isinstance(result, dict) else str(result)
# 檢查 GPU 記憶體使用
gpu_info = ""
if torch.cuda.is_available():
gpu_memory = torch.cuda.memory_allocated() / 1024**3
gpu_info = f"💾 GPU 記憶體: {gpu_memory:.2f}GB"
# 格式化性能指標
performance = f"""⏱️ 總處理時間: {total_time:.2f}s
🔄 模型載入時間: {model_load_time:.2f}s
🎯 推論時間: {inference_time:.2f}s
🎵 音訊長度: {audio_duration:.2f}s
📈 RTF: {rtf:.3f} ({'實時' if rtf < 1.0 else '非實時'})
💾 模型: MediaTek Breeze-ASR-25
{gpu_info}"""
return transcript, performance, "✅ 識別成功"
except Exception as e:
error_msg = f"❌ 處理失敗: {str(e)}"
print(error_msg)
return error_msg, "", "❌ 處理失敗"
def get_model_info():
"""獲取模型資訊 (CPU 函數)"""
return """🤖 MediaTek Breeze-ASR-25 模型資訊:
- 基於 Whisper 架構,專為台灣國語優化
- 支援繁體中文語音識別
- ZeroGPU 動態載入模式
- 每次推論重新載入以確保穩定性"""
# Gradio 界面
with gr.Blocks(title="MediaTek ASR 台灣國語測試") as demo:
gr.Markdown("# 🎤 MediaTek Breeze-ASR-25 台灣國語識別測試")
gr.Markdown("**專為台灣國語優化的語音識別測試平台**")
# 模型資訊顯示
with gr.Accordion("🤖 模型資訊", open=False):
model_info = gr.Textbox(
value=get_model_info(),
label="模型詳細資訊",
lines=6,
interactive=False
)
with gr.Row():
with gr.Column():
gr.Markdown("### 🎙️ 音訊輸入")
audio_input = gr.Audio(
type="filepath",
label="上傳音訊檔案 (wav, mp3, m4a)",
format="wav"
)
gr.Markdown("### 📋 測試說明")
gr.Markdown("""
- 🎯 上傳 5-60 秒的台灣國語音訊
- 🔊 建議使用清晰、低噪音的錄音
- ⚡ 每次識別會重新載入模型 (ZeroGPU 限制)
- 📊 系統會顯示詳細的性能指標
""")
submit_btn = gr.Button("🚀 開始識別", variant="primary", size="lg")
with gr.Column():
gr.Markdown("### 📄 識別結果")
transcript_output = gr.Textbox(
label="✨ 識別文字",
lines=5,
placeholder="識別結果將顯示在這裡..."
)
performance_output = gr.Textbox(
label="⚡ 性能指標",
lines=8,
placeholder="性能數據將顯示在這裡..."
)
status_output = gr.Textbox(
label="📊 處理狀態",
lines=2
)
# 使用範例
with gr.Accordion("📖 使用範例與 API", open=False):
gr.Markdown("""
## 🔗 Gradio Client API 使用
```python
from gradio_client import Client
client = Client("sheep52031/mediatek-asr-test")
result = client.predict("audio_file.wav", api_name="/predict")
transcript = result[0] # 識別文字
performance = result[1] # 性能指標
status = result[2] # 處理狀態
```
## 📊 評估指標
- **RTF < 1.0**: 實時處理能力
- **準確度**: 台灣國語識別正確率
- **處理時間**: 總耗時包含模型載入
""")
# 事件綁定
submit_btn.click(
transcribe_audio,
inputs=[audio_input],
outputs=[transcript_output, performance_output, status_output]
)
if __name__ == "__main__":
demo.launch() |