esp32voice

Sleeping

App Files Files Community

HexaGrim commited on 3 days ago

Commit

85e4066

verified ·

1 Parent(s): a2f0c46

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -53

app.py CHANGED Viewed

@@ -1,81 +1,100 @@
-# ... (keep existing imports)
-# New global state
-trigger_ready = False
 latest_user_text = ""
-# ... (keep ask_llama, get_audio, latest functions)
 @app.route('/trigger_record', methods=['POST'])
 def trigger_record():
     global trigger_ready
     trigger_ready = True
-    return jsonify({"status": "signal_sent"})
 @app.route('/check_trigger')
 def check_trigger():
     global trigger_ready
     if trigger_ready:
-        trigger_ready = False  # Reset after pick-up
         return jsonify({"trigger": True})
     return jsonify({"trigger": False})
-@app.route('/process_audio_full', methods=['POST'])
-def process_audio():
-    global latest_user_text # Track what we said
-    try:
-        audio_file = io.BytesIO(request.data)
-        samplerate, data = wavfile.read(audio_file)
-        if data.dtype != np.float32:
-            data = data.astype(np.float32) / 32768.0
-        stt_result = stt_pipeline({"sampling_rate": samplerate, "raw": data})
-        user_text = stt_result.get('text', '').strip() or "..."
-        latest_user_text = user_text # Update global
-        ai_reply = ask_llama(user_text)
-        file_id = str(uuid.uuid4())
-        path = f"/tmp/{file_id}.mp3"
-        gTTS(text=ai_reply, lang='en').save(path)
-        return jsonify({
-            "audio_url": f"/get_audio/{file_id}",
-            "user_text": user_text,
-            "ai_text": ai_reply
-        })
-    except Exception as e:
-        logger.error(e)
-        return jsonify({"error": "Failed"}), 500
-# Homepage
 @app.route('/')
 def index():
     return render_template_string("""
     <!DOCTYPE html>
     <html>
     <head>
-        <title>SIR Voice Assistant</title>
         <meta name="viewport" content="width=device-width, initial-scale=1.0">
         <style>
-            body { display: flex; flex-direction: column; align-items: center; justify-content: center; min-height: 100vh; background: #0a0a0a; color: white; font-family: sans-serif; }
-            #btn { width: 150px; height: 150px; border-radius: 50%; border: none; background: #ff3b30; color: white; font-weight: bold; }
-            #transcript { margin-top: 30px; font-size: 1.5rem; color: #00ffcc; text-align: center; padding: 20px; }
-            #status { color: #ffcc00; margin-bottom: 10px; }
         </style>
     </head>
     <body>
-        <div id="status">IDLE</div>
         <button id="btn">SIR ACTIVE</button>
-        <div id="transcript">Waiting for voice...</div>
         <script>
             let audioContext, processor, input, stream, recBuffer = [];
             const status = document.getElementById('status');
             const transcript = document.getElementById('transcript');
             async function startRecording() {
                 recBuffer = [];
                 try {
                     stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                     audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
@@ -85,16 +104,19 @@ def index():
                     input.connect(processor);
                     processor.connect(audioContext.destination);
-                    status.innerText = "LISTENING (4s)...";
-                    // Auto-stop after 4 seconds
                     setTimeout(stopRecording, 4000);
-                } catch (e) { status.innerText = "Mic Error"; }
             }
             function stopRecording() {
                 if (!stream || status.innerText === "PROCESSING...") return;
                 status.innerText = "PROCESSING...";
                 input.disconnect(); processor.disconnect();
                 stream.getTracks().forEach(t => t.stop());
@@ -104,22 +126,24 @@ def index():
                 fetch('/process_audio_full', { method: 'POST', body: wavBlob })
                     .then(r => r.json())
                     .then(data => {
                         transcript.innerText = "YOU SAID: " + data.user_text;
                         if(data.audio_url) {
                             const audio = new Audio(data.audio_url);
                             audio.play();
-                            status.innerText = "REPLYING...";
-                            audio.onended = () => { status.innerText = "IDLE"; };
                         }
-                    });
             }
-            // Helper functions (mergeBuffers and encodeWAV same as your original)
             function mergeBuffers(buffer) {
                 let length = buffer.length * 4096, result = new Float32Array(length), offset = 0;
                 for (let i = 0; i < buffer.length; i++) { result.set(buffer[i], offset); offset += buffer[i].length; }
                 return result;
             }
             function encodeWAV(samples) {
                 let buffer = new ArrayBuffer(44 + samples.length * 2), view = new DataView(buffer);
                 const writeStr = (s, o) => { for (let i=0; i<s.length; i++) view.setUint8(o+i, s.charCodeAt(i)); };
@@ -138,18 +162,59 @@ def index():
                 return new Blob([view], { type: 'audio/wav' });
             }
-            // POLL SERVER FOR REMOTE TRIGGER
             setInterval(() => {
-                if (status.innerText === "IDLE") {
                     fetch('/check_trigger')
                         .then(r => r.json())
                         .then(data => {
                             if (data.trigger) startRecording();
                         });
                 }
-            }, 1000); // Check every second
         </script>
     </body>
     </html>
-    """)

+from flask import Flask, request, jsonify, send_file, render_template_string
+import os, io, uuid, requests, json, logging, torch
+import numpy as np
+from transformers import pipeline
+from gtts import gTTS
+from scipy.io import wavfile
+# Setup
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = Flask(__name__)
+OPENROUTER_API_KEY = "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"
+MODEL_ID = "meta-llama/llama-3.1-405b-instruct:free"
+# Storage & State
+latest_ai_text = "No interaction yet."
 latest_user_text = ""
+trigger_ready = False  # Flag for remote POST command
+# Load Whisper STT
+device = "cuda" if torch.cuda.is_available() else "cpu"
+stt_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=device)
+def ask_llama(text):
+    global latest_ai_text
+    if not text.strip(): return "..."
+    try:
+        headers = {
+            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+            "HTTP-Referer": "https://huggingface.co/spaces",
+            "Content-Type": "application/json"
+        }
+        payload = {
+            "model": MODEL_ID,
+            "messages": [
+                {"role": "system", "content": "You are SIR(study intelligence robot). Max 10 words. Made by Aayush Parajuli and His team. only reply in text and numbers strictly forbid using any symbols or emojis"},
+                {"role": "user", "content": text}
+            ]
+        }
+        r = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, data=json.dumps(payload), timeout=15)
+        ans = r.json()['choices'][0]['message']['content'].strip()
+        latest_ai_text = ans
+        return ans
+    except Exception as e:
+        logger.error(f"OpenRouter Error: {e}")
+        return "AI response unavailable."
+# --- REMOTE TRIGGER ENDPOINTS ---
 @app.route('/trigger_record', methods=['POST'])
 def trigger_record():
+    """External devices (ESP32/Postman) call this to start the web recording."""
     global trigger_ready
     trigger_ready = True
+    return jsonify({"status": "signal_sent", "message": "Recording will start on homepage"})
 @app.route('/check_trigger')
 def check_trigger():
+    """Homepage polls this to see if it should start recording."""
     global trigger_ready
     if trigger_ready:
+        trigger_ready = False
         return jsonify({"trigger": True})
     return jsonify({"trigger": False})
+# --- CORE LOGIC ---
 @app.route('/')
 def index():
     return render_template_string("""
     <!DOCTYPE html>
     <html>
     <head>
+        <title>Voice AI Assistant</title>
         <meta name="viewport" content="width=device-width, initial-scale=1.0">
         <style>
+            body { display: flex; flex-direction: column; align-items: center; justify-content: center; min-height: 100vh; background: #0a0a0a; color: white; font-family: 'Segoe UI', sans-serif; margin: 0; padding: 0; }
+            #btn { width: 180px; height: 180px; border-radius: 50%; border: 6px solid #333; background: #ff3b30; color: white; font-size: 20px; font-weight: bold; cursor: pointer; transition: 0.3s; box-shadow: 0 0 30px rgba(255, 59, 48, 0.3); z-index: 10; }
+            #status { margin-top: 20px; font-family: monospace; color: #ffcc00; font-size: 1.2rem; }
+            #transcript { margin-top: 20px; font-size: 1.4rem; color: #00ffcc; text-align: center; max-width: 80%; }
         </style>
     </head>
     <body>
         <button id="btn">SIR ACTIVE</button>
+        <div id="status">READY / WAITING FOR TRIGGER</div>
+        <div id="transcript"></div>
         <script>
             let audioContext, processor, input, stream, recBuffer = [];
+            const btn = document.getElementById('btn');
             const status = document.getElementById('status');
             const transcript = document.getElementById('transcript');
             async function startRecording() {
                 recBuffer = [];
+                transcript.innerText = "";
                 try {
                     stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                     audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
                     input.connect(processor);
                     processor.connect(audioContext.destination);
+                    status.innerText = "LISTENING (4 SECONDS)...";
+                    btn.style.background = "#34c759"; // Green while recording
+                    // Automatically stop after 4 seconds
                     setTimeout(stopRecording, 4000);
+                } catch (e) { alert("Microphone access denied."); }
             }
             function stopRecording() {
                 if (!stream || status.innerText === "PROCESSING...") return;
                 status.innerText = "PROCESSING...";
+                btn.style.background = "#ff3b30";
                 input.disconnect(); processor.disconnect();
                 stream.getTracks().forEach(t => t.stop());
                 fetch('/process_audio_full', { method: 'POST', body: wavBlob })
                     .then(r => r.json())
                     .then(data => {
+                        // SHOW WHAT WE SAID
                         transcript.innerText = "YOU SAID: " + data.user_text;
                         if(data.audio_url) {
                             const audio = new Audio(data.audio_url);
                             audio.play();
+                            status.innerText = "SIR REPLYING...";
+                            audio.onended = () => { status.innerText = "READY / WAITING FOR TRIGGER"; };
                         }
+                    }).catch(() => { status.innerText = "ERROR"; });
             }
             function mergeBuffers(buffer) {
                 let length = buffer.length * 4096, result = new Float32Array(length), offset = 0;
                 for (let i = 0; i < buffer.length; i++) { result.set(buffer[i], offset); offset += buffer[i].length; }
                 return result;
             }
             function encodeWAV(samples) {
                 let buffer = new ArrayBuffer(44 + samples.length * 2), view = new DataView(buffer);
                 const writeStr = (s, o) => { for (let i=0; i<s.length; i++) view.setUint8(o+i, s.charCodeAt(i)); };
                 return new Blob([view], { type: 'audio/wav' });
             }
+            // POLL FOR EXTERNAL TRIGGER
             setInterval(() => {
+                if (status.innerText.includes("READY")) {
                     fetch('/check_trigger')
                         .then(r => r.json())
                         .then(data => {
                             if (data.trigger) startRecording();
                         });
                 }
+            }, 1000);
+            // Initialize audio on first click (Browser requirement)
+            btn.onclick = () => { status.innerText = "SIR READY & ACTIVE"; };
         </script>
     </body>
     </html>
+    """)
+@app.route('/process_audio_full', methods=['POST'])
+def process_audio():
+    global latest_user_text
+    try:
+        audio_file = io.BytesIO(request.data)
+        samplerate, data = wavfile.read(audio_file)
+        if data.dtype != np.float32:
+            data = data.astype(np.float32) / 32768.0
+        stt_result = stt_pipeline({"sampling_rate": samplerate, "raw": data})
+        user_text = stt_result.get('text', '').strip() or "..."
+        latest_user_text = user_text # Store what we said
+        ai_reply = ask_llama(user_text)
+        file_id = str(uuid.uuid4())
+        path = f"/tmp/{file_id}.mp3"
+        gTTS(text=ai_reply, lang='en').save(path)
+        return jsonify({
+            "audio_url": f"/get_audio/{file_id}",
+            "user_text": user_text,
+            "ai_text": ai_reply
+        })
+    except Exception as e:
+        logger.error(e)
+        return jsonify({"error": "Failed to process audio"}), 500
+@app.route('/get_audio/<fid>')
+def get_audio(fid):
+    return send_file(f"/tmp/{fid}.mp3")
+@app.route('/latest_text')
+def latest():
+    return latest_ai_text
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860)