import os import json import base64 import time import shutil import uuid import re from pathlib import Path from io import BytesIO from fastapi import FastAPI, UploadFile, File, Form, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse import google.generativeai as genai from gtts import gTTS # --- SETUP --- API_KEY = os.environ.get("GEMINI_API_KEY") if not API_KEY: raise RuntimeError("GEMINI_API_KEY not set in Secrets") genai.configure(api_key=API_KEY) app = FastAPI( title="AgricFusion API v3 - Gemini 3", description="Voice-to-voice agric advisor powered by Gemini 3 Flash", version="3.0" ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) UPLOAD_DIR = Path("/tmp/agric_voice") UPLOAD_DIR.mkdir(exist_ok=True) # Map for gTTS (Note: Igbo and Yoruba support is limited in gTTS, # using 'en' as a fallback or 'sw' for Swahili) GTTS_LANG_MAP = { "en": "en", "yo": "en", # Fallback to en if gTTS fails for Yoruba "ha": "en", "ig": "en", "sw": "sw", } SYSTEM_PROMPT = """ You are AgricFusion AI, an expert in tropical agriculture. Task: 1. Detect spoken language (English, Yoruba, Hausa, Igbo, Swahili). 2. Transcribe exactly. 3. Give practical, organic advice. 4. Respond ONLY in the detected language. Return STRICT JSON: { "transcription": "...", "advice": "...", "lang_code": "en|yo|ha|ig|sw" } """ def clean_json_response(text): """Removes markdown backticks and extracts raw JSON.""" text = re.sub(r"```json\s*|```", "", text).strip() return text def wait_for_gemini_file(file_obj, max_wait=60): waited = 0 while file_obj.state.name == "PROCESSING" and waited < max_wait: time.sleep(2) waited += 2 file_obj = genai.get_file(file_obj.name) if file_obj.state.name != "ACTIVE": raise Exception("Gemini audio processing failed") @app.get("/") def health_check(): return {"status": "online", "model": "gemini-3-flash-preview"} @app.post("/agric-voice") async def process_voice_to_voice( audio: UploadFile = File(...), language: str = Form(None) ): file_id = str(uuid.uuid4()) suffix = Path(audio.filename).suffix if audio.filename else ".webm" local_path = UPLOAD_DIR / f"{file_id}{suffix}" gemini_file = None try: # 1. Save local file with local_path.open("wb") as buffer: shutil.copyfileobj(audio.file, buffer) # 2. Upload to Gemini gemini_file = genai.upload_file(path=str(local_path)) wait_for_gemini_file(gemini_file) # 3. Initialize Gemini 3 Flash (Fastest for Voice) model = genai.GenerativeModel( model_name="gemini-3-flash-preview", generation_config={ "response_mime_type": "application/json", "temperature": 1.0, } ) lang_hint = f" Context: Farmer mentioned {language}." if language else "" response = model.generate_content([gemini_file, SYSTEM_PROMPT + lang_hint]) # 4. Robust JSON Parsing try: res_data = json.loads(clean_json_response(response.text)) except Exception as e: return JSONResponse(status_code=500, content={"error": "JSON Parse Error", "raw": response.text}) advice = res_data.get("advice", "No advice generated.") lang_code = res_data.get("lang_code", "en").lower() # 5. TTS Generation tts_lang = GTTS_LANG_MAP.get(lang_code, "en") tts = gTTS(text=advice, lang=tts_lang, slow=False) audio_io = BytesIO() tts.write_to_fp(audio_io) audio_io.seek(0) audio_base64 = base64.b64encode(audio_io.read()).decode("utf-8") return { "success": True, "transcription": res_data.get("transcription"), "advice": advice, "detected_lang": lang_code, "audio_base64": audio_base64, "mime": "audio/mpeg" } except Exception as e: return JSONResponse(status_code=500, content={"success": False, "error": str(e)}) finally: # Cleanup if local_path.exists(): os.remove(local_path) if gemini_file: genai.delete_file(gemini_file.name)