Spaces:

Emeritus-21
/

local-lang-speechtotext

Sleeping

App Files Files Community

local-lang-speechtotext / app.py

Emeritus-21

Update app.py

ee164c1 verified about 2 months ago

raw

history blame contribute delete

4.4 kB

	import os
	import json
	import base64
	import time
	import shutil
	import uuid
	import re
	from pathlib import Path
	from io import BytesIO

	from fastapi import FastAPI, UploadFile, File, Form, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	import google.generativeai as genai
	from gtts import gTTS

	# --- SETUP ---
	API_KEY = os.environ.get("GEMINI_API_KEY")
	if not API_KEY:
	raise RuntimeError("GEMINI_API_KEY not set in Secrets")

	genai.configure(api_key=API_KEY)

	app = FastAPI(
	title="AgricFusion API v3 - Gemini 3",
	description="Voice-to-voice agric advisor powered by Gemini 3 Flash",
	version="3.0"
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	UPLOAD_DIR = Path("/tmp/agric_voice")
	UPLOAD_DIR.mkdir(exist_ok=True)

	# Map for gTTS (Note: Igbo and Yoruba support is limited in gTTS,
	# using 'en' as a fallback or 'sw' for Swahili)
	GTTS_LANG_MAP = {
	"en": "en",
	"yo": "en", # Fallback to en if gTTS fails for Yoruba
	"ha": "en",
	"ig": "en",
	"sw": "sw",
	}

	SYSTEM_PROMPT = """
	You are AgricFusion AI, an expert in tropical agriculture.
	Task:
	1. Detect spoken language (English, Yoruba, Hausa, Igbo, Swahili).
	2. Transcribe exactly.
	3. Give practical, organic advice.
	4. Respond ONLY in the detected language.

	Return STRICT JSON:
	{
	"transcription": "...",
	"advice": "...",
	"lang_code": "en\|yo\|ha\|ig\|sw"
	}
	"""

	def clean_json_response(text):
	"""Removes markdown backticks and extracts raw JSON."""
	text = re.sub(r"```json\s*\|```", "", text).strip()
	return text

	def wait_for_gemini_file(file_obj, max_wait=60):
	waited = 0
	while file_obj.state.name == "PROCESSING" and waited < max_wait:
	time.sleep(2)
	waited += 2
	file_obj = genai.get_file(file_obj.name)
	if file_obj.state.name != "ACTIVE":
	raise Exception("Gemini audio processing failed")

	@app.get("/")
	def health_check():
	return {"status": "online", "model": "gemini-3-flash-preview"}

	@app.post("/agric-voice")
	async def process_voice_to_voice(
	audio: UploadFile = File(...),
	language: str = Form(None)
	):
	file_id = str(uuid.uuid4())
	suffix = Path(audio.filename).suffix if audio.filename else ".webm"
	local_path = UPLOAD_DIR / f"{file_id}{suffix}"

	gemini_file = None

	try:
	# 1. Save local file
	with local_path.open("wb") as buffer:
	shutil.copyfileobj(audio.file, buffer)

	# 2. Upload to Gemini
	gemini_file = genai.upload_file(path=str(local_path))
	wait_for_gemini_file(gemini_file)

	# 3. Initialize Gemini 3 Flash (Fastest for Voice)
	model = genai.GenerativeModel(
	model_name="gemini-3-flash-preview",
	generation_config={
	"response_mime_type": "application/json",
	"temperature": 1.0,
	}
	)

	lang_hint = f" Context: Farmer mentioned {language}." if language else ""
	response = model.generate_content([gemini_file, SYSTEM_PROMPT + lang_hint])

	# 4. Robust JSON Parsing
	try:
	res_data = json.loads(clean_json_response(response.text))
	except Exception as e:
	return JSONResponse(status_code=500, content={"error": "JSON Parse Error", "raw": response.text})

	advice = res_data.get("advice", "No advice generated.")
	lang_code = res_data.get("lang_code", "en").lower()

	# 5. TTS Generation
	tts_lang = GTTS_LANG_MAP.get(lang_code, "en")
	tts = gTTS(text=advice, lang=tts_lang, slow=False)

	audio_io = BytesIO()
	tts.write_to_fp(audio_io)
	audio_io.seek(0)
	audio_base64 = base64.b64encode(audio_io.read()).decode("utf-8")

	return {
	"success": True,
	"transcription": res_data.get("transcription"),
	"advice": advice,
	"detected_lang": lang_code,
	"audio_base64": audio_base64,
	"mime": "audio/mpeg"
	}

	except Exception as e:
	return JSONResponse(status_code=500, content={"success": False, "error": str(e)})

	finally:
	# Cleanup
	if local_path.exists():
	os.remove(local_path)
	if gemini_file:
	genai.delete_file(gemini_file.name)