HexaGrim commited on
Commit
a2f0c46
·
verified ·
1 Parent(s): 47bb31e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -139
app.py CHANGED
@@ -1,162 +1,155 @@
1
- from flask import Flask, request, jsonify, send_file, render_template_string
2
- import io, uuid, logging, torch, requests, json
3
- import numpy as np
4
- from transformers import pipeline
5
- from gtts import gTTS
6
- from scipy.io import wavfile
7
 
8
- logging.basicConfig(level=logging.INFO)
9
- logger = logging.getLogger(__name__)
 
10
 
11
- app = Flask(__name__)
12
 
13
- # ------------- HARD-CODED API KEY -------------
14
- OPENROUTER_API_KEY = "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"
15
- MODEL_ID = "meta-llama/llama-3.1-405b-instruct:free"
 
 
16
 
17
- latest_ai_text = "No interaction yet."
18
- record_trigger_3s = False
 
 
 
 
 
19
 
20
- device = "cuda" if torch.cuda.is_available() else "cpu"
21
- stt_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=device)
22
-
23
- # ------------- AI FUNCTION -------------
24
- def ask_llama(text):
25
- global latest_ai_text
26
- if not text.strip(): return "..."
27
- try:
28
- headers = {
29
- "Authorization": f"Bearer {OPENROUTER_API_KEY}",
30
- "Content-Type": "application/json"
31
- }
32
- payload = {
33
- "model": MODEL_ID,
34
- "messages": [
35
- {"role": "system", "content": "You are SIR(study intelligence robot). Max 10 words. Only text/numbers. No symbols or emojis."},
36
- {"role": "user", "content": text}
37
- ]
38
- }
39
- r = requests.post("https://openrouter.ai/api/v1/chat/completions",
40
- headers=headers, data=json.dumps(payload), timeout=15)
41
- ans = r.json()['choices'][0]['message']['content'].strip()
42
- latest_ai_text = ans
43
- return ans
44
- except Exception as e:
45
- logger.error(f"OpenRouter Error: {e}")
46
- return "AI response unavailable."
47
-
48
- # ------------- ROUTES -------------
49
-
50
- @app.route('/')
51
- def index():
52
- return render_template_string("""
53
- <!DOCTYPE html>
54
- <html>
55
- <head>
56
- <title>ESP32 AI Recorder</title>
57
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
58
- <style>
59
- body { display:flex; flex-direction:column; align-items:center; justify-content:center; min-height:100vh; background:#111; color:white; font-family:sans-serif; }
60
- button { width:180px; height:180px; border-radius:50%; border:6px solid #333; background:#ff3b30; color:white; font-size:20px; font-weight:bold; cursor:pointer; box-shadow:0 0 30px rgba(255,59,48,0.3); }
61
- button:active { transform:scale(0.9); background:#d12f26; }
62
- #status { margin-top:20px; font-family:monospace; color:#ffcc00; }
63
- audio { margin-top:20px; width:80%; }
64
- </style>
65
- </head>
66
- <body>
67
- <button id="enableMic">ENABLE MICROPHONE</button>
68
- <div id="status">READY</div>
69
- <audio id="playback" controls></audio>
70
- <script>
71
- let mediaRecorder, audioChunks = [], micStream;
72
- const status = document.getElementById('status');
73
-
74
- async function startMic() {
75
- micStream = await navigator.mediaDevices.getUserMedia({audio:true});
76
- mediaRecorder = new MediaRecorder(micStream);
77
- mediaRecorder.ondataavailable = e => audioChunks.push(e.data);
78
- mediaRecorder.onstop = () => {
79
- const audioBlob = new Blob(audioChunks, {type:'audio/webm'});
80
- audioChunks = [];
81
- fetch('/process_audio_full',{method:'POST',body:audioBlob})
82
- .then(r=>r.json())
83
- .then(data=>{
84
- if(data.audio_url){
85
- const audio = new Audio(data.audio_url);
86
- audio.play();
87
- status.innerText = "AI REPLY PLAYING...";
88
- audio.onended = ()=>status.innerText="READY";
89
- }
90
- }).catch(()=>status.innerText="ERROR");
91
- };
92
- status.innerText="MIC ENABLED";
93
- }
94
- document.getElementById('enableMic').addEventListener('click', startMic);
95
-
96
- // Poll server every 500ms for ESP32 trigger
97
- setInterval(async ()=>{
98
- if(mediaRecorder && mediaRecorder.state==="inactive"){
99
- const res = await fetch('/check_record_3s');
100
- const data = await res.json();
101
- if(data.record){
102
- status.innerText="RECORDING 3s...";
103
- mediaRecorder.start();
104
- setTimeout(()=>mediaRecorder.stop(),3000);
105
- }
106
- }
107
- },500);
108
- </script>
109
- </body>
110
- </html>
111
- """)
112
-
113
- # ESP32 triggers 3-second recording
114
- @app.route('/start_record_3s', methods=['POST'])
115
- def start_record_3s():
116
- global record_trigger_3s
117
- record_trigger_3s = True
118
- return jsonify({"status":"ok"})
119
-
120
- # Browser polls to check if it should record
121
- @app.route('/check_record_3s')
122
- def check_record_3s():
123
- global record_trigger_3s
124
- if record_trigger_3s:
125
- record_trigger_3s = False
126
- return jsonify({"record": True})
127
- return jsonify({"record": False})
128
-
129
- # Process audio and return AI TTS
130
  @app.route('/process_audio_full', methods=['POST'])
131
  def process_audio():
 
132
  try:
133
  audio_file = io.BytesIO(request.data)
134
  samplerate, data = wavfile.read(audio_file)
135
  if data.dtype != np.float32:
136
- data = data.astype(np.float32)/32768.0
137
 
138
- stt_result = stt_pipeline({"sampling_rate": samplerate, "raw":data})
139
- user_text = stt_result.get('text','').strip() or "Listening..."
 
 
140
  ai_reply = ask_llama(user_text)
141
 
142
  file_id = str(uuid.uuid4())
143
  path = f"/tmp/{file_id}.mp3"
144
  gTTS(text=ai_reply, lang='en').save(path)
145
 
146
- return jsonify({"audio_url": f"/get_audio/{file_id}", "user_text": user_text, "ai_text": ai_reply})
 
 
 
 
147
  except Exception as e:
148
  logger.error(e)
149
- return jsonify({"error":"Failed to process audio"}),500
150
 
151
- # Serve TTS audio
152
- @app.route('/get_audio/<fid>')
153
- def get_audio(fid):
154
- return send_file(f"/tmp/{fid}.mp3")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
- # Latest AI text
157
- @app.route('/latest_text')
158
- def latest_text():
159
- return latest_ai_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
- if __name__=="__main__":
162
- app.run(host='0.0.0.0', port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ... (keep existing imports)
 
 
 
 
 
2
 
3
+ # New global state
4
+ trigger_ready = False
5
+ latest_user_text = ""
6
 
7
+ # ... (keep ask_llama, get_audio, latest functions)
8
 
9
+ @app.route('/trigger_record', methods=['POST'])
10
+ def trigger_record():
11
+ global trigger_ready
12
+ trigger_ready = True
13
+ return jsonify({"status": "signal_sent"})
14
 
15
+ @app.route('/check_trigger')
16
+ def check_trigger():
17
+ global trigger_ready
18
+ if trigger_ready:
19
+ trigger_ready = False # Reset after pick-up
20
+ return jsonify({"trigger": True})
21
+ return jsonify({"trigger": False})
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  @app.route('/process_audio_full', methods=['POST'])
24
  def process_audio():
25
+ global latest_user_text # Track what we said
26
  try:
27
  audio_file = io.BytesIO(request.data)
28
  samplerate, data = wavfile.read(audio_file)
29
  if data.dtype != np.float32:
30
+ data = data.astype(np.float32) / 32768.0
31
 
32
+ stt_result = stt_pipeline({"sampling_rate": samplerate, "raw": data})
33
+ user_text = stt_result.get('text', '').strip() or "..."
34
+ latest_user_text = user_text # Update global
35
+
36
  ai_reply = ask_llama(user_text)
37
 
38
  file_id = str(uuid.uuid4())
39
  path = f"/tmp/{file_id}.mp3"
40
  gTTS(text=ai_reply, lang='en').save(path)
41
 
42
+ return jsonify({
43
+ "audio_url": f"/get_audio/{file_id}",
44
+ "user_text": user_text,
45
+ "ai_text": ai_reply
46
+ })
47
  except Exception as e:
48
  logger.error(e)
49
+ return jsonify({"error": "Failed"}), 500
50
 
51
+ # Homepage
52
+ @app.route('/')
53
+ def index():
54
+ return render_template_string("""
55
+ <!DOCTYPE html>
56
+ <html>
57
+ <head>
58
+ <title>SIR Voice Assistant</title>
59
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
60
+ <style>
61
+ body { display: flex; flex-direction: column; align-items: center; justify-content: center; min-height: 100vh; background: #0a0a0a; color: white; font-family: sans-serif; }
62
+ #btn { width: 150px; height: 150px; border-radius: 50%; border: none; background: #ff3b30; color: white; font-weight: bold; }
63
+ #transcript { margin-top: 30px; font-size: 1.5rem; color: #00ffcc; text-align: center; padding: 20px; }
64
+ #status { color: #ffcc00; margin-bottom: 10px; }
65
+ </style>
66
+ </head>
67
+ <body>
68
+ <div id="status">IDLE</div>
69
+ <button id="btn">SIR ACTIVE</button>
70
+ <div id="transcript">Waiting for voice...</div>
71
+
72
+ <script>
73
+ let audioContext, processor, input, stream, recBuffer = [];
74
+ const status = document.getElementById('status');
75
+ const transcript = document.getElementById('transcript');
76
+
77
+ async function startRecording() {
78
+ recBuffer = [];
79
+ try {
80
+ stream = await navigator.mediaDevices.getUserMedia({ audio: true });
81
+ audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
82
+ input = audioContext.createMediaStreamSource(stream);
83
+ processor = audioContext.createScriptProcessor(4096, 1, 1);
84
+ processor.onaudioprocess = (e) => recBuffer.push(new Float32Array(e.inputBuffer.getChannelData(0)));
85
+ input.connect(processor);
86
+ processor.connect(audioContext.destination);
87
+
88
+ status.innerText = "LISTENING (4s)...";
89
+
90
+ // Auto-stop after 4 seconds
91
+ setTimeout(stopRecording, 4000);
92
+ } catch (e) { status.innerText = "Mic Error"; }
93
+ }
94
 
95
+ function stopRecording() {
96
+ if (!stream || status.innerText === "PROCESSING...") return;
97
+ status.innerText = "PROCESSING...";
98
+ input.disconnect(); processor.disconnect();
99
+ stream.getTracks().forEach(t => t.stop());
100
+
101
+ const merged = mergeBuffers(recBuffer);
102
+ const wavBlob = encodeWAV(merged);
103
+
104
+ fetch('/process_audio_full', { method: 'POST', body: wavBlob })
105
+ .then(r => r.json())
106
+ .then(data => {
107
+ transcript.innerText = "YOU SAID: " + data.user_text;
108
+ if(data.audio_url) {
109
+ const audio = new Audio(data.audio_url);
110
+ audio.play();
111
+ status.innerText = "REPLYING...";
112
+ audio.onended = () => { status.innerText = "IDLE"; };
113
+ }
114
+ });
115
+ }
116
+
117
+ // Helper functions (mergeBuffers and encodeWAV same as your original)
118
+ function mergeBuffers(buffer) {
119
+ let length = buffer.length * 4096, result = new Float32Array(length), offset = 0;
120
+ for (let i = 0; i < buffer.length; i++) { result.set(buffer[i], offset); offset += buffer[i].length; }
121
+ return result;
122
+ }
123
+ function encodeWAV(samples) {
124
+ let buffer = new ArrayBuffer(44 + samples.length * 2), view = new DataView(buffer);
125
+ const writeStr = (s, o) => { for (let i=0; i<s.length; i++) view.setUint8(o+i, s.charCodeAt(i)); };
126
+ writeStr('RIFF', 0); view.setUint32(4, 36 + samples.length * 2, true);
127
+ writeStr('WAVE', 8); writeStr('fmt ', 12);
128
+ view.setUint32(16, 16, true); view.setUint16(20, 1, true);
129
+ view.setUint16(22, 1, true); view.setUint32(24, 16000, true);
130
+ view.setUint32(28, 32000, true); view.setUint16(32, 2, true);
131
+ view.setUint16(34, 16, true); writeStr('data', 36);
132
+ view.setUint32(40, samples.length * 2, true);
133
+ let index = 44;
134
+ for (let i=0; i<samples.length; i++, index+=2) {
135
+ let s = Math.max(-1, Math.min(1, samples[i]));
136
+ view.setInt16(index, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
137
+ }
138
+ return new Blob([view], { type: 'audio/wav' });
139
+ }
140
 
141
+ // POLL SERVER FOR REMOTE TRIGGER
142
+ setInterval(() => {
143
+ if (status.innerText === "IDLE") {
144
+ fetch('/check_trigger')
145
+ .then(r => r.json())
146
+ .then(data => {
147
+ if (data.trigger) startRecording();
148
+ });
149
+ }
150
+ }, 1000); // Check every second
151
+
152
+ </script>
153
+ </body>
154
+ </html>
155
+ """)