Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,162 +1,155 @@
|
|
| 1 |
-
|
| 2 |
-
import io, uuid, logging, torch, requests, json
|
| 3 |
-
import numpy as np
|
| 4 |
-
from transformers import pipeline
|
| 5 |
-
from gtts import gTTS
|
| 6 |
-
from scipy.io import wavfile
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
|
|
|
| 10 |
|
| 11 |
-
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 21 |
-
stt_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=device)
|
| 22 |
-
|
| 23 |
-
# ------------- AI FUNCTION -------------
|
| 24 |
-
def ask_llama(text):
|
| 25 |
-
global latest_ai_text
|
| 26 |
-
if not text.strip(): return "..."
|
| 27 |
-
try:
|
| 28 |
-
headers = {
|
| 29 |
-
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
| 30 |
-
"Content-Type": "application/json"
|
| 31 |
-
}
|
| 32 |
-
payload = {
|
| 33 |
-
"model": MODEL_ID,
|
| 34 |
-
"messages": [
|
| 35 |
-
{"role": "system", "content": "You are SIR(study intelligence robot). Max 10 words. Only text/numbers. No symbols or emojis."},
|
| 36 |
-
{"role": "user", "content": text}
|
| 37 |
-
]
|
| 38 |
-
}
|
| 39 |
-
r = requests.post("https://openrouter.ai/api/v1/chat/completions",
|
| 40 |
-
headers=headers, data=json.dumps(payload), timeout=15)
|
| 41 |
-
ans = r.json()['choices'][0]['message']['content'].strip()
|
| 42 |
-
latest_ai_text = ans
|
| 43 |
-
return ans
|
| 44 |
-
except Exception as e:
|
| 45 |
-
logger.error(f"OpenRouter Error: {e}")
|
| 46 |
-
return "AI response unavailable."
|
| 47 |
-
|
| 48 |
-
# ------------- ROUTES -------------
|
| 49 |
-
|
| 50 |
-
@app.route('/')
|
| 51 |
-
def index():
|
| 52 |
-
return render_template_string("""
|
| 53 |
-
<!DOCTYPE html>
|
| 54 |
-
<html>
|
| 55 |
-
<head>
|
| 56 |
-
<title>ESP32 AI Recorder</title>
|
| 57 |
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 58 |
-
<style>
|
| 59 |
-
body { display:flex; flex-direction:column; align-items:center; justify-content:center; min-height:100vh; background:#111; color:white; font-family:sans-serif; }
|
| 60 |
-
button { width:180px; height:180px; border-radius:50%; border:6px solid #333; background:#ff3b30; color:white; font-size:20px; font-weight:bold; cursor:pointer; box-shadow:0 0 30px rgba(255,59,48,0.3); }
|
| 61 |
-
button:active { transform:scale(0.9); background:#d12f26; }
|
| 62 |
-
#status { margin-top:20px; font-family:monospace; color:#ffcc00; }
|
| 63 |
-
audio { margin-top:20px; width:80%; }
|
| 64 |
-
</style>
|
| 65 |
-
</head>
|
| 66 |
-
<body>
|
| 67 |
-
<button id="enableMic">ENABLE MICROPHONE</button>
|
| 68 |
-
<div id="status">READY</div>
|
| 69 |
-
<audio id="playback" controls></audio>
|
| 70 |
-
<script>
|
| 71 |
-
let mediaRecorder, audioChunks = [], micStream;
|
| 72 |
-
const status = document.getElementById('status');
|
| 73 |
-
|
| 74 |
-
async function startMic() {
|
| 75 |
-
micStream = await navigator.mediaDevices.getUserMedia({audio:true});
|
| 76 |
-
mediaRecorder = new MediaRecorder(micStream);
|
| 77 |
-
mediaRecorder.ondataavailable = e => audioChunks.push(e.data);
|
| 78 |
-
mediaRecorder.onstop = () => {
|
| 79 |
-
const audioBlob = new Blob(audioChunks, {type:'audio/webm'});
|
| 80 |
-
audioChunks = [];
|
| 81 |
-
fetch('/process_audio_full',{method:'POST',body:audioBlob})
|
| 82 |
-
.then(r=>r.json())
|
| 83 |
-
.then(data=>{
|
| 84 |
-
if(data.audio_url){
|
| 85 |
-
const audio = new Audio(data.audio_url);
|
| 86 |
-
audio.play();
|
| 87 |
-
status.innerText = "AI REPLY PLAYING...";
|
| 88 |
-
audio.onended = ()=>status.innerText="READY";
|
| 89 |
-
}
|
| 90 |
-
}).catch(()=>status.innerText="ERROR");
|
| 91 |
-
};
|
| 92 |
-
status.innerText="MIC ENABLED";
|
| 93 |
-
}
|
| 94 |
-
document.getElementById('enableMic').addEventListener('click', startMic);
|
| 95 |
-
|
| 96 |
-
// Poll server every 500ms for ESP32 trigger
|
| 97 |
-
setInterval(async ()=>{
|
| 98 |
-
if(mediaRecorder && mediaRecorder.state==="inactive"){
|
| 99 |
-
const res = await fetch('/check_record_3s');
|
| 100 |
-
const data = await res.json();
|
| 101 |
-
if(data.record){
|
| 102 |
-
status.innerText="RECORDING 3s...";
|
| 103 |
-
mediaRecorder.start();
|
| 104 |
-
setTimeout(()=>mediaRecorder.stop(),3000);
|
| 105 |
-
}
|
| 106 |
-
}
|
| 107 |
-
},500);
|
| 108 |
-
</script>
|
| 109 |
-
</body>
|
| 110 |
-
</html>
|
| 111 |
-
""")
|
| 112 |
-
|
| 113 |
-
# ESP32 triggers 3-second recording
|
| 114 |
-
@app.route('/start_record_3s', methods=['POST'])
|
| 115 |
-
def start_record_3s():
|
| 116 |
-
global record_trigger_3s
|
| 117 |
-
record_trigger_3s = True
|
| 118 |
-
return jsonify({"status":"ok"})
|
| 119 |
-
|
| 120 |
-
# Browser polls to check if it should record
|
| 121 |
-
@app.route('/check_record_3s')
|
| 122 |
-
def check_record_3s():
|
| 123 |
-
global record_trigger_3s
|
| 124 |
-
if record_trigger_3s:
|
| 125 |
-
record_trigger_3s = False
|
| 126 |
-
return jsonify({"record": True})
|
| 127 |
-
return jsonify({"record": False})
|
| 128 |
-
|
| 129 |
-
# Process audio and return AI TTS
|
| 130 |
@app.route('/process_audio_full', methods=['POST'])
|
| 131 |
def process_audio():
|
|
|
|
| 132 |
try:
|
| 133 |
audio_file = io.BytesIO(request.data)
|
| 134 |
samplerate, data = wavfile.read(audio_file)
|
| 135 |
if data.dtype != np.float32:
|
| 136 |
-
data = data.astype(np.float32)/32768.0
|
| 137 |
|
| 138 |
-
stt_result = stt_pipeline({"sampling_rate": samplerate, "raw":data})
|
| 139 |
-
user_text = stt_result.get('text','').strip() or "
|
|
|
|
|
|
|
| 140 |
ai_reply = ask_llama(user_text)
|
| 141 |
|
| 142 |
file_id = str(uuid.uuid4())
|
| 143 |
path = f"/tmp/{file_id}.mp3"
|
| 144 |
gTTS(text=ai_reply, lang='en').save(path)
|
| 145 |
|
| 146 |
-
return jsonify({
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
except Exception as e:
|
| 148 |
logger.error(e)
|
| 149 |
-
return jsonify({"error":"Failed
|
| 150 |
|
| 151 |
-
#
|
| 152 |
-
@app.route('/
|
| 153 |
-
def
|
| 154 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ... (keep existing imports)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
# New global state
|
| 4 |
+
trigger_ready = False
|
| 5 |
+
latest_user_text = ""
|
| 6 |
|
| 7 |
+
# ... (keep ask_llama, get_audio, latest functions)
|
| 8 |
|
| 9 |
+
@app.route('/trigger_record', methods=['POST'])
|
| 10 |
+
def trigger_record():
|
| 11 |
+
global trigger_ready
|
| 12 |
+
trigger_ready = True
|
| 13 |
+
return jsonify({"status": "signal_sent"})
|
| 14 |
|
| 15 |
+
@app.route('/check_trigger')
|
| 16 |
+
def check_trigger():
|
| 17 |
+
global trigger_ready
|
| 18 |
+
if trigger_ready:
|
| 19 |
+
trigger_ready = False # Reset after pick-up
|
| 20 |
+
return jsonify({"trigger": True})
|
| 21 |
+
return jsonify({"trigger": False})
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
@app.route('/process_audio_full', methods=['POST'])
|
| 24 |
def process_audio():
|
| 25 |
+
global latest_user_text # Track what we said
|
| 26 |
try:
|
| 27 |
audio_file = io.BytesIO(request.data)
|
| 28 |
samplerate, data = wavfile.read(audio_file)
|
| 29 |
if data.dtype != np.float32:
|
| 30 |
+
data = data.astype(np.float32) / 32768.0
|
| 31 |
|
| 32 |
+
stt_result = stt_pipeline({"sampling_rate": samplerate, "raw": data})
|
| 33 |
+
user_text = stt_result.get('text', '').strip() or "..."
|
| 34 |
+
latest_user_text = user_text # Update global
|
| 35 |
+
|
| 36 |
ai_reply = ask_llama(user_text)
|
| 37 |
|
| 38 |
file_id = str(uuid.uuid4())
|
| 39 |
path = f"/tmp/{file_id}.mp3"
|
| 40 |
gTTS(text=ai_reply, lang='en').save(path)
|
| 41 |
|
| 42 |
+
return jsonify({
|
| 43 |
+
"audio_url": f"/get_audio/{file_id}",
|
| 44 |
+
"user_text": user_text,
|
| 45 |
+
"ai_text": ai_reply
|
| 46 |
+
})
|
| 47 |
except Exception as e:
|
| 48 |
logger.error(e)
|
| 49 |
+
return jsonify({"error": "Failed"}), 500
|
| 50 |
|
| 51 |
+
# Homepage
|
| 52 |
+
@app.route('/')
|
| 53 |
+
def index():
|
| 54 |
+
return render_template_string("""
|
| 55 |
+
<!DOCTYPE html>
|
| 56 |
+
<html>
|
| 57 |
+
<head>
|
| 58 |
+
<title>SIR Voice Assistant</title>
|
| 59 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 60 |
+
<style>
|
| 61 |
+
body { display: flex; flex-direction: column; align-items: center; justify-content: center; min-height: 100vh; background: #0a0a0a; color: white; font-family: sans-serif; }
|
| 62 |
+
#btn { width: 150px; height: 150px; border-radius: 50%; border: none; background: #ff3b30; color: white; font-weight: bold; }
|
| 63 |
+
#transcript { margin-top: 30px; font-size: 1.5rem; color: #00ffcc; text-align: center; padding: 20px; }
|
| 64 |
+
#status { color: #ffcc00; margin-bottom: 10px; }
|
| 65 |
+
</style>
|
| 66 |
+
</head>
|
| 67 |
+
<body>
|
| 68 |
+
<div id="status">IDLE</div>
|
| 69 |
+
<button id="btn">SIR ACTIVE</button>
|
| 70 |
+
<div id="transcript">Waiting for voice...</div>
|
| 71 |
+
|
| 72 |
+
<script>
|
| 73 |
+
let audioContext, processor, input, stream, recBuffer = [];
|
| 74 |
+
const status = document.getElementById('status');
|
| 75 |
+
const transcript = document.getElementById('transcript');
|
| 76 |
+
|
| 77 |
+
async function startRecording() {
|
| 78 |
+
recBuffer = [];
|
| 79 |
+
try {
|
| 80 |
+
stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
| 81 |
+
audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
|
| 82 |
+
input = audioContext.createMediaStreamSource(stream);
|
| 83 |
+
processor = audioContext.createScriptProcessor(4096, 1, 1);
|
| 84 |
+
processor.onaudioprocess = (e) => recBuffer.push(new Float32Array(e.inputBuffer.getChannelData(0)));
|
| 85 |
+
input.connect(processor);
|
| 86 |
+
processor.connect(audioContext.destination);
|
| 87 |
+
|
| 88 |
+
status.innerText = "LISTENING (4s)...";
|
| 89 |
+
|
| 90 |
+
// Auto-stop after 4 seconds
|
| 91 |
+
setTimeout(stopRecording, 4000);
|
| 92 |
+
} catch (e) { status.innerText = "Mic Error"; }
|
| 93 |
+
}
|
| 94 |
|
| 95 |
+
function stopRecording() {
|
| 96 |
+
if (!stream || status.innerText === "PROCESSING...") return;
|
| 97 |
+
status.innerText = "PROCESSING...";
|
| 98 |
+
input.disconnect(); processor.disconnect();
|
| 99 |
+
stream.getTracks().forEach(t => t.stop());
|
| 100 |
+
|
| 101 |
+
const merged = mergeBuffers(recBuffer);
|
| 102 |
+
const wavBlob = encodeWAV(merged);
|
| 103 |
+
|
| 104 |
+
fetch('/process_audio_full', { method: 'POST', body: wavBlob })
|
| 105 |
+
.then(r => r.json())
|
| 106 |
+
.then(data => {
|
| 107 |
+
transcript.innerText = "YOU SAID: " + data.user_text;
|
| 108 |
+
if(data.audio_url) {
|
| 109 |
+
const audio = new Audio(data.audio_url);
|
| 110 |
+
audio.play();
|
| 111 |
+
status.innerText = "REPLYING...";
|
| 112 |
+
audio.onended = () => { status.innerText = "IDLE"; };
|
| 113 |
+
}
|
| 114 |
+
});
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
// Helper functions (mergeBuffers and encodeWAV same as your original)
|
| 118 |
+
function mergeBuffers(buffer) {
|
| 119 |
+
let length = buffer.length * 4096, result = new Float32Array(length), offset = 0;
|
| 120 |
+
for (let i = 0; i < buffer.length; i++) { result.set(buffer[i], offset); offset += buffer[i].length; }
|
| 121 |
+
return result;
|
| 122 |
+
}
|
| 123 |
+
function encodeWAV(samples) {
|
| 124 |
+
let buffer = new ArrayBuffer(44 + samples.length * 2), view = new DataView(buffer);
|
| 125 |
+
const writeStr = (s, o) => { for (let i=0; i<s.length; i++) view.setUint8(o+i, s.charCodeAt(i)); };
|
| 126 |
+
writeStr('RIFF', 0); view.setUint32(4, 36 + samples.length * 2, true);
|
| 127 |
+
writeStr('WAVE', 8); writeStr('fmt ', 12);
|
| 128 |
+
view.setUint32(16, 16, true); view.setUint16(20, 1, true);
|
| 129 |
+
view.setUint16(22, 1, true); view.setUint32(24, 16000, true);
|
| 130 |
+
view.setUint32(28, 32000, true); view.setUint16(32, 2, true);
|
| 131 |
+
view.setUint16(34, 16, true); writeStr('data', 36);
|
| 132 |
+
view.setUint32(40, samples.length * 2, true);
|
| 133 |
+
let index = 44;
|
| 134 |
+
for (let i=0; i<samples.length; i++, index+=2) {
|
| 135 |
+
let s = Math.max(-1, Math.min(1, samples[i]));
|
| 136 |
+
view.setInt16(index, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
|
| 137 |
+
}
|
| 138 |
+
return new Blob([view], { type: 'audio/wav' });
|
| 139 |
+
}
|
| 140 |
|
| 141 |
+
// POLL SERVER FOR REMOTE TRIGGER
|
| 142 |
+
setInterval(() => {
|
| 143 |
+
if (status.innerText === "IDLE") {
|
| 144 |
+
fetch('/check_trigger')
|
| 145 |
+
.then(r => r.json())
|
| 146 |
+
.then(data => {
|
| 147 |
+
if (data.trigger) startRecording();
|
| 148 |
+
});
|
| 149 |
+
}
|
| 150 |
+
}, 1000); // Check every second
|
| 151 |
+
|
| 152 |
+
</script>
|
| 153 |
+
</body>
|
| 154 |
+
</html>
|
| 155 |
+
""")
|