Spaces:

sheep52031
/

mediatek-asr-test

Runtime error

App Files Files Community

mediatek-asr-test / app.py

sheep52031

🎤 MediaTek ASR 台灣國語測試 Space 初始版本

c989afe verified 3 months ago

raw

history blame contribute delete

5.53 kB

	"""
	MediaTek Breeze-ASR-25 台灣國語識別測試 Space
	適用於 HuggingFace Zero GPU Spaces 部署
	修復版：解決 ZeroGPU 會話間模型載入問題
	"""

	import gradio as gr
	import spaces
	from transformers import pipeline
	import torch
	import time
	import torchaudio

	@spaces.GPU(duration=60)
	def transcribe_audio(audio_file):
	"""ASR 推論與效能測試 - 每次調用時載入模型"""

	if audio_file is None:
	return "❌ 請上傳音訊檔案", "", ""

	start_total = time.time()

	try:
	# 每次推論時載入模型（ZeroGPU 限制）
	print("🔄 載入 MediaTek Breeze-ASR-25 模型...")
	model_load_start = time.time()

	asr_model = pipeline(
	"automatic-speech-recognition",
	model="MediaTek-Research/Breeze-ASR-25",
	torch_dtype=torch.float16,
	device="cuda",
	return_timestamps=True
	)

	model_load_time = time.time() - model_load_start
	print(f"✅ 模型載入完成 ({model_load_time:.2f}s)")

	# 載入音訊檔案獲取長度
	waveform, sample_rate = torchaudio.load(audio_file)
	audio_duration = waveform.shape[1] / sample_rate

	# 執行 ASR 推論
	inference_start = time.time()
	result = asr_model(audio_file)
	inference_time = time.time() - inference_start

	# 計算總處理時間
	total_time = time.time() - start_total
	rtf = total_time / audio_duration

	# 提取識別結果
	transcript = result["text"] if isinstance(result, dict) else str(result)

	# 檢查 GPU 記憶體使用
	gpu_info = ""
	if torch.cuda.is_available():
	gpu_memory = torch.cuda.memory_allocated() / 1024**3
	gpu_info = f"💾 GPU 記憶體: {gpu_memory:.2f}GB"

	# 格式化性能指標
	performance = f"""⏱️ 總處理時間: {total_time:.2f}s
	🔄 模型載入時間: {model_load_time:.2f}s
	🎯 推論時間: {inference_time:.2f}s
	🎵 音訊長度: {audio_duration:.2f}s
	📈 RTF: {rtf:.3f} ({'實時' if rtf < 1.0 else '非實時'})
	💾 模型: MediaTek Breeze-ASR-25
	{gpu_info}"""

	return transcript, performance, "✅ 識別成功"

	except Exception as e:
	error_msg = f"❌ 處理失敗: {str(e)}"
	print(error_msg)
	return error_msg, "", "❌ 處理失敗"

	def get_model_info():
	"""獲取模型資訊 (CPU 函數)"""
	return """🤖 MediaTek Breeze-ASR-25 模型資訊:
	- 基於 Whisper 架構，專為台灣國語優化
	- 支援繁體中文語音識別
	- ZeroGPU 動態載入模式
	- 每次推論重新載入以確保穩定性"""

	# Gradio 界面
	with gr.Blocks(title="MediaTek ASR 台灣國語測試") as demo:
	gr.Markdown("# 🎤 MediaTek Breeze-ASR-25 台灣國語識別測試")
	gr.Markdown("專為台灣國語優化的語音識別測試平台")

	# 模型資訊顯示
	with gr.Accordion("🤖 模型資訊", open=False):
	model_info = gr.Textbox(
	value=get_model_info(),
	label="模型詳細資訊",
	lines=6,
	interactive=False
	)

	with gr.Row():
	with gr.Column():
	gr.Markdown("### 🎙️ 音訊輸入")
	audio_input = gr.Audio(
	type="filepath",
	label="上傳音訊檔案 (wav, mp3, m4a)",
	format="wav"
	)

	gr.Markdown("### 📋 測試說明")
	gr.Markdown("""
	- 🎯 上傳 5-60 秒的台灣國語音訊
	- 🔊 建議使用清晰、低噪音的錄音
	- ⚡ 每次識別會重新載入模型 (ZeroGPU 限制)
	- 📊 系統會顯示詳細的性能指標
	""")

	submit_btn = gr.Button("🚀 開始識別", variant="primary", size="lg")

	with gr.Column():
	gr.Markdown("### 📄 識別結果")
	transcript_output = gr.Textbox(
	label="✨ 識別文字",
	lines=5,
	placeholder="識別結果將顯示在這裡..."
	)

	performance_output = gr.Textbox(
	label="⚡ 性能指標",
	lines=8,
	placeholder="性能數據將顯示在這裡..."
	)

	status_output = gr.Textbox(
	label="📊 處理狀態",
	lines=2
	)

	# 使用範例
	with gr.Accordion("📖 使用範例與 API", open=False):
	gr.Markdown("""
	## 🔗 Gradio Client API 使用

	```python
	from gradio_client import Client

	client = Client("sheep52031/mediatek-asr-test")
	result = client.predict("audio_file.wav", api_name="/predict")

	transcript = result[0] # 識別文字
	performance = result[1] # 性能指標
	status = result[2] # 處理狀態
	```

	## 📊 評估指標
	- RTF < 1.0: 實時處理能力
	- 準確度: 台灣國語識別正確率
	- 處理時間: 總耗時包含模型載入
	""")

	# 事件綁定
	submit_btn.click(
	transcribe_audio,
	inputs=[audio_input],
	outputs=[transcript_output, performance_output, status_output]
	)

	if __name__ == "__main__":
	demo.launch()