esp32voice

Running

App Files Files Community

HexaGrim commited on 3 days ago

Commit

a2f0c46

verified ·

1 Parent(s): 47bb31e

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -139

app.py CHANGED Viewed

@@ -1,162 +1,155 @@
-from flask import Flask, request, jsonify, send_file, render_template_string
-import io, uuid, logging, torch, requests, json
-import numpy as np
-from transformers import pipeline
-from gtts import gTTS
-from scipy.io import wavfile
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-app = Flask(__name__)
-# ------------- HARD-CODED API KEY -------------
-OPENROUTER_API_KEY = "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"
-MODEL_ID = "meta-llama/llama-3.1-405b-instruct:free"
-latest_ai_text = "No interaction yet."
-record_trigger_3s = False
-device = "cuda" if torch.cuda.is_available() else "cpu"
-stt_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=device)
-# ------------- AI FUNCTION -------------
-def ask_llama(text):
-    global latest_ai_text
-    if not text.strip(): return "..."
-    try:
-        headers = {
-            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
-            "Content-Type": "application/json"
-        }
-        payload = {
-            "model": MODEL_ID,
-            "messages": [
-                {"role": "system", "content": "You are SIR(study intelligence robot). Max 10 words. Only text/numbers. No symbols or emojis."},
-                {"role": "user", "content": text}
-            ]
-        }
-        r = requests.post("https://openrouter.ai/api/v1/chat/completions",
-                          headers=headers, data=json.dumps(payload), timeout=15)
-        ans = r.json()['choices'][0]['message']['content'].strip()
-        latest_ai_text = ans
-        return ans
-    except Exception as e:
-        logger.error(f"OpenRouter Error: {e}")
-        return "AI response unavailable."
-# ------------- ROUTES -------------
-@app.route('/')
-def index():
-    return render_template_string("""
-<!DOCTYPE html>
-<html>
-<head>
-<title>ESP32 AI Recorder</title>
-<meta name="viewport" content="width=device-width, initial-scale=1.0">
-<style>
-body { display:flex; flex-direction:column; align-items:center; justify-content:center; min-height:100vh; background:#111; color:white; font-family:sans-serif; }
-button { width:180px; height:180px; border-radius:50%; border:6px solid #333; background:#ff3b30; color:white; font-size:20px; font-weight:bold; cursor:pointer; box-shadow:0 0 30px rgba(255,59,48,0.3); }
-button:active { transform:scale(0.9); background:#d12f26; }
-#status { margin-top:20px; font-family:monospace; color:#ffcc00; }
-audio { margin-top:20px; width:80%; }
-</style>
-</head>
-<body>
-<button id="enableMic">ENABLE MICROPHONE</button>
-<div id="status">READY</div>
-<audio id="playback" controls></audio>
-<script>
-let mediaRecorder, audioChunks = [], micStream;
-const status = document.getElementById('status');
-async function startMic() {
-    micStream = await navigator.mediaDevices.getUserMedia({audio:true});
-    mediaRecorder = new MediaRecorder(micStream);
-    mediaRecorder.ondataavailable = e => audioChunks.push(e.data);
-    mediaRecorder.onstop = () => {
-        const audioBlob = new Blob(audioChunks, {type:'audio/webm'});
-        audioChunks = [];
-        fetch('/process_audio_full',{method:'POST',body:audioBlob})
-        .then(r=>r.json())
-        .then(data=>{
-            if(data.audio_url){
-                const audio = new Audio(data.audio_url);
-                audio.play();
-                status.innerText = "AI REPLY PLAYING...";
-                audio.onended = ()=>status.innerText="READY";
-            }
-        }).catch(()=>status.innerText="ERROR");
-    };
-    status.innerText="MIC ENABLED";
-}
-document.getElementById('enableMic').addEventListener('click', startMic);
-// Poll server every 500ms for ESP32 trigger
-setInterval(async ()=>{
-    if(mediaRecorder && mediaRecorder.state==="inactive"){
-        const res = await fetch('/check_record_3s');
-        const data = await res.json();
-        if(data.record){
-            status.innerText="RECORDING 3s...";
-            mediaRecorder.start();
-            setTimeout(()=>mediaRecorder.stop(),3000);
-        }
-    }
-},500);
-</script>
-</body>
-</html>
-""")
-# ESP32 triggers 3-second recording
-@app.route('/start_record_3s', methods=['POST'])
-def start_record_3s():
-    global record_trigger_3s
-    record_trigger_3s = True
-    return jsonify({"status":"ok"})
-# Browser polls to check if it should record
-@app.route('/check_record_3s')
-def check_record_3s():
-    global record_trigger_3s
-    if record_trigger_3s:
-        record_trigger_3s = False
-        return jsonify({"record": True})
-    return jsonify({"record": False})
-# Process audio and return AI TTS
 @app.route('/process_audio_full', methods=['POST'])
 def process_audio():
     try:
         audio_file = io.BytesIO(request.data)
         samplerate, data = wavfile.read(audio_file)
         if data.dtype != np.float32:
-            data = data.astype(np.float32)/32768.0
-        stt_result = stt_pipeline({"sampling_rate": samplerate, "raw":data})
-        user_text = stt_result.get('text','').strip() or "Listening..."
         ai_reply = ask_llama(user_text)
         file_id = str(uuid.uuid4())
         path = f"/tmp/{file_id}.mp3"
         gTTS(text=ai_reply, lang='en').save(path)
-        return jsonify({"audio_url": f"/get_audio/{file_id}", "user_text": user_text, "ai_text": ai_reply})
     except Exception as e:
         logger.error(e)
-        return jsonify({"error":"Failed to process audio"}),500
-# Serve TTS audio
-@app.route('/get_audio/<fid>')
-def get_audio(fid):
-    return send_file(f"/tmp/{fid}.mp3")
-# Latest AI text
-@app.route('/latest_text')
-def latest_text():
-    return latest_ai_text
-if __name__=="__main__":
-    app.run(host='0.0.0.0', port=7860)

+# ... (keep existing imports)
+# New global state
+trigger_ready = False
+latest_user_text = ""
+# ... (keep ask_llama, get_audio, latest functions)
+@app.route('/trigger_record', methods=['POST'])
+def trigger_record():
+    global trigger_ready
+    trigger_ready = True
+    return jsonify({"status": "signal_sent"})
+@app.route('/check_trigger')
+def check_trigger():
+    global trigger_ready
+    if trigger_ready:
+        trigger_ready = False  # Reset after pick-up
+        return jsonify({"trigger": True})
+    return jsonify({"trigger": False})
 @app.route('/process_audio_full', methods=['POST'])
 def process_audio():
+    global latest_user_text # Track what we said
     try:
         audio_file = io.BytesIO(request.data)
         samplerate, data = wavfile.read(audio_file)
         if data.dtype != np.float32:
+            data = data.astype(np.float32) / 32768.0
+        stt_result = stt_pipeline({"sampling_rate": samplerate, "raw": data})
+        user_text = stt_result.get('text', '').strip() or "..."
+        latest_user_text = user_text # Update global
         ai_reply = ask_llama(user_text)
         file_id = str(uuid.uuid4())
         path = f"/tmp/{file_id}.mp3"
         gTTS(text=ai_reply, lang='en').save(path)
+        return jsonify({
+            "audio_url": f"/get_audio/{file_id}",
+            "user_text": user_text,
+            "ai_text": ai_reply
+        })
     except Exception as e:
         logger.error(e)
+        return jsonify({"error": "Failed"}), 500
+# Homepage
+@app.route('/')
+def index():
+    return render_template_string("""
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>SIR Voice Assistant</title>
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <style>
+            body { display: flex; flex-direction: column; align-items: center; justify-content: center; min-height: 100vh; background: #0a0a0a; color: white; font-family: sans-serif; }
+            #btn { width: 150px; height: 150px; border-radius: 50%; border: none; background: #ff3b30; color: white; font-weight: bold; }
+            #transcript { margin-top: 30px; font-size: 1.5rem; color: #00ffcc; text-align: center; padding: 20px; }
+            #status { color: #ffcc00; margin-bottom: 10px; }
+        </style>
+    </head>
+    <body>
+        <div id="status">IDLE</div>
+        <button id="btn">SIR ACTIVE</button>
+        <div id="transcript">Waiting for voice...</div>
+        <script>
+            let audioContext, processor, input, stream, recBuffer = [];
+            const status = document.getElementById('status');
+            const transcript = document.getElementById('transcript');
+            async function startRecording() {
+                recBuffer = [];
+                try {
+                    stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+                    audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
+                    input = audioContext.createMediaStreamSource(stream);
+                    processor = audioContext.createScriptProcessor(4096, 1, 1);
+                    processor.onaudioprocess = (e) => recBuffer.push(new Float32Array(e.inputBuffer.getChannelData(0)));
+                    input.connect(processor);
+                    processor.connect(audioContext.destination);
+                    status.innerText = "LISTENING (4s)...";
+                    // Auto-stop after 4 seconds
+                    setTimeout(stopRecording, 4000);
+                } catch (e) { status.innerText = "Mic Error"; }
+            }
+            function stopRecording() {
+                if (!stream || status.innerText === "PROCESSING...") return;
+                status.innerText = "PROCESSING...";
+                input.disconnect(); processor.disconnect();
+                stream.getTracks().forEach(t => t.stop());
+                const merged = mergeBuffers(recBuffer);
+                const wavBlob = encodeWAV(merged);
+                fetch('/process_audio_full', { method: 'POST', body: wavBlob })
+                    .then(r => r.json())
+                    .then(data => {
+                        transcript.innerText = "YOU SAID: " + data.user_text;
+                        if(data.audio_url) {
+                            const audio = new Audio(data.audio_url);
+                            audio.play();
+                            status.innerText = "REPLYING...";
+                            audio.onended = () => { status.innerText = "IDLE"; };
+                        }
+                    });
+            }
+            // Helper functions (mergeBuffers and encodeWAV same as your original)
+            function mergeBuffers(buffer) {
+                let length = buffer.length * 4096, result = new Float32Array(length), offset = 0;
+                for (let i = 0; i < buffer.length; i++) { result.set(buffer[i], offset); offset += buffer[i].length; }
+                return result;
+            }
+            function encodeWAV(samples) {
+                let buffer = new ArrayBuffer(44 + samples.length * 2), view = new DataView(buffer);
+                const writeStr = (s, o) => { for (let i=0; i<s.length; i++) view.setUint8(o+i, s.charCodeAt(i)); };
+                writeStr('RIFF', 0); view.setUint32(4, 36 + samples.length * 2, true);
+                writeStr('WAVE', 8); writeStr('fmt ', 12);
+                view.setUint32(16, 16, true); view.setUint16(20, 1, true);
+                view.setUint16(22, 1, true); view.setUint32(24, 16000, true);
+                view.setUint32(28, 32000, true); view.setUint16(32, 2, true);
+                view.setUint16(34, 16, true); writeStr('data', 36);
+                view.setUint32(40, samples.length * 2, true);
+                let index = 44;
+                for (let i=0; i<samples.length; i++, index+=2) {
+                    let s = Math.max(-1, Math.min(1, samples[i]));
+                    view.setInt16(index, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
+                }
+                return new Blob([view], { type: 'audio/wav' });
+            }
+            // POLL SERVER FOR REMOTE TRIGGER
+            setInterval(() => {
+                if (status.innerText === "IDLE") {
+                    fetch('/check_trigger')
+                        .then(r => r.json())
+                        .then(data => {
+                            if (data.trigger) startRecording();
+                        });
+                }
+            }, 1000); // Check every second
+        </script>
+    </body>
+    </html>
+    """)