HexaGrim commited on
Commit
85e4066
·
verified ·
1 Parent(s): a2f0c46

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -53
app.py CHANGED
@@ -1,81 +1,100 @@
1
- # ... (keep existing imports)
2
-
3
- # New global state
4
- trigger_ready = False
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  latest_user_text = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # ... (keep ask_llama, get_audio, latest functions)
8
 
9
  @app.route('/trigger_record', methods=['POST'])
10
  def trigger_record():
 
11
  global trigger_ready
12
  trigger_ready = True
13
- return jsonify({"status": "signal_sent"})
14
 
15
  @app.route('/check_trigger')
16
  def check_trigger():
 
17
  global trigger_ready
18
  if trigger_ready:
19
- trigger_ready = False # Reset after pick-up
20
  return jsonify({"trigger": True})
21
  return jsonify({"trigger": False})
22
 
23
- @app.route('/process_audio_full', methods=['POST'])
24
- def process_audio():
25
- global latest_user_text # Track what we said
26
- try:
27
- audio_file = io.BytesIO(request.data)
28
- samplerate, data = wavfile.read(audio_file)
29
- if data.dtype != np.float32:
30
- data = data.astype(np.float32) / 32768.0
31
-
32
- stt_result = stt_pipeline({"sampling_rate": samplerate, "raw": data})
33
- user_text = stt_result.get('text', '').strip() or "..."
34
- latest_user_text = user_text # Update global
35
-
36
- ai_reply = ask_llama(user_text)
37
-
38
- file_id = str(uuid.uuid4())
39
- path = f"/tmp/{file_id}.mp3"
40
- gTTS(text=ai_reply, lang='en').save(path)
41
-
42
- return jsonify({
43
- "audio_url": f"/get_audio/{file_id}",
44
- "user_text": user_text,
45
- "ai_text": ai_reply
46
- })
47
- except Exception as e:
48
- logger.error(e)
49
- return jsonify({"error": "Failed"}), 500
50
 
51
- # Homepage
52
  @app.route('/')
53
  def index():
54
  return render_template_string("""
55
  <!DOCTYPE html>
56
  <html>
57
  <head>
58
- <title>SIR Voice Assistant</title>
59
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
60
  <style>
61
- body { display: flex; flex-direction: column; align-items: center; justify-content: center; min-height: 100vh; background: #0a0a0a; color: white; font-family: sans-serif; }
62
- #btn { width: 150px; height: 150px; border-radius: 50%; border: none; background: #ff3b30; color: white; font-weight: bold; }
63
- #transcript { margin-top: 30px; font-size: 1.5rem; color: #00ffcc; text-align: center; padding: 20px; }
64
- #status { color: #ffcc00; margin-bottom: 10px; }
65
  </style>
66
  </head>
67
  <body>
68
- <div id="status">IDLE</div>
69
  <button id="btn">SIR ACTIVE</button>
70
- <div id="transcript">Waiting for voice...</div>
 
71
 
72
  <script>
73
  let audioContext, processor, input, stream, recBuffer = [];
 
74
  const status = document.getElementById('status');
75
  const transcript = document.getElementById('transcript');
76
 
77
  async function startRecording() {
78
  recBuffer = [];
 
79
  try {
80
  stream = await navigator.mediaDevices.getUserMedia({ audio: true });
81
  audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
@@ -85,16 +104,19 @@ def index():
85
  input.connect(processor);
86
  processor.connect(audioContext.destination);
87
 
88
- status.innerText = "LISTENING (4s)...";
 
89
 
90
- // Auto-stop after 4 seconds
91
  setTimeout(stopRecording, 4000);
92
- } catch (e) { status.innerText = "Mic Error"; }
93
  }
94
 
95
  function stopRecording() {
96
  if (!stream || status.innerText === "PROCESSING...") return;
97
  status.innerText = "PROCESSING...";
 
 
98
  input.disconnect(); processor.disconnect();
99
  stream.getTracks().forEach(t => t.stop());
100
 
@@ -104,22 +126,24 @@ def index():
104
  fetch('/process_audio_full', { method: 'POST', body: wavBlob })
105
  .then(r => r.json())
106
  .then(data => {
 
107
  transcript.innerText = "YOU SAID: " + data.user_text;
 
108
  if(data.audio_url) {
109
  const audio = new Audio(data.audio_url);
110
  audio.play();
111
- status.innerText = "REPLYING...";
112
- audio.onended = () => { status.innerText = "IDLE"; };
113
  }
114
- });
115
  }
116
 
117
- // Helper functions (mergeBuffers and encodeWAV same as your original)
118
  function mergeBuffers(buffer) {
119
  let length = buffer.length * 4096, result = new Float32Array(length), offset = 0;
120
  for (let i = 0; i < buffer.length; i++) { result.set(buffer[i], offset); offset += buffer[i].length; }
121
  return result;
122
  }
 
123
  function encodeWAV(samples) {
124
  let buffer = new ArrayBuffer(44 + samples.length * 2), view = new DataView(buffer);
125
  const writeStr = (s, o) => { for (let i=0; i<s.length; i++) view.setUint8(o+i, s.charCodeAt(i)); };
@@ -138,18 +162,59 @@ def index():
138
  return new Blob([view], { type: 'audio/wav' });
139
  }
140
 
141
- // POLL SERVER FOR REMOTE TRIGGER
142
  setInterval(() => {
143
- if (status.innerText === "IDLE") {
144
  fetch('/check_trigger')
145
  .then(r => r.json())
146
  .then(data => {
147
  if (data.trigger) startRecording();
148
  });
149
  }
150
- }, 1000); // Check every second
151
 
 
 
152
  </script>
153
  </body>
154
  </html>
155
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, send_file, render_template_string
2
+ import os, io, uuid, requests, json, logging, torch
3
+ import numpy as np
4
+ from transformers import pipeline
5
+ from gtts import gTTS
6
+ from scipy.io import wavfile
7
+
8
+ # Setup
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
+
12
+ app = Flask(__name__)
13
+ OPENROUTER_API_KEY = "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"
14
+ MODEL_ID = "meta-llama/llama-3.1-405b-instruct:free"
15
+
16
+ # Storage & State
17
+ latest_ai_text = "No interaction yet."
18
  latest_user_text = ""
19
+ trigger_ready = False # Flag for remote POST command
20
+
21
+ # Load Whisper STT
22
+ device = "cuda" if torch.cuda.is_available() else "cpu"
23
+ stt_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device=device)
24
+
25
+ def ask_llama(text):
26
+ global latest_ai_text
27
+ if not text.strip(): return "..."
28
+ try:
29
+ headers = {
30
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
31
+ "HTTP-Referer": "https://huggingface.co/spaces",
32
+ "Content-Type": "application/json"
33
+ }
34
+ payload = {
35
+ "model": MODEL_ID,
36
+ "messages": [
37
+ {"role": "system", "content": "You are SIR(study intelligence robot). Max 10 words. Made by Aayush Parajuli and His team. only reply in text and numbers strictly forbid using any symbols or emojis"},
38
+ {"role": "user", "content": text}
39
+ ]
40
+ }
41
+ r = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, data=json.dumps(payload), timeout=15)
42
+ ans = r.json()['choices'][0]['message']['content'].strip()
43
+ latest_ai_text = ans
44
+ return ans
45
+ except Exception as e:
46
+ logger.error(f"OpenRouter Error: {e}")
47
+ return "AI response unavailable."
48
 
49
+ # --- REMOTE TRIGGER ENDPOINTS ---
50
 
51
  @app.route('/trigger_record', methods=['POST'])
52
  def trigger_record():
53
+ """External devices (ESP32/Postman) call this to start the web recording."""
54
  global trigger_ready
55
  trigger_ready = True
56
+ return jsonify({"status": "signal_sent", "message": "Recording will start on homepage"})
57
 
58
  @app.route('/check_trigger')
59
  def check_trigger():
60
+ """Homepage polls this to see if it should start recording."""
61
  global trigger_ready
62
  if trigger_ready:
63
+ trigger_ready = False
64
  return jsonify({"trigger": True})
65
  return jsonify({"trigger": False})
66
 
67
+ # --- CORE LOGIC ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
 
69
  @app.route('/')
70
  def index():
71
  return render_template_string("""
72
  <!DOCTYPE html>
73
  <html>
74
  <head>
75
+ <title>Voice AI Assistant</title>
76
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
77
  <style>
78
+ body { display: flex; flex-direction: column; align-items: center; justify-content: center; min-height: 100vh; background: #0a0a0a; color: white; font-family: 'Segoe UI', sans-serif; margin: 0; padding: 0; }
79
+ #btn { width: 180px; height: 180px; border-radius: 50%; border: 6px solid #333; background: #ff3b30; color: white; font-size: 20px; font-weight: bold; cursor: pointer; transition: 0.3s; box-shadow: 0 0 30px rgba(255, 59, 48, 0.3); z-index: 10; }
80
+ #status { margin-top: 20px; font-family: monospace; color: #ffcc00; font-size: 1.2rem; }
81
+ #transcript { margin-top: 20px; font-size: 1.4rem; color: #00ffcc; text-align: center; max-width: 80%; }
82
  </style>
83
  </head>
84
  <body>
 
85
  <button id="btn">SIR ACTIVE</button>
86
+ <div id="status">READY / WAITING FOR TRIGGER</div>
87
+ <div id="transcript"></div>
88
 
89
  <script>
90
  let audioContext, processor, input, stream, recBuffer = [];
91
+ const btn = document.getElementById('btn');
92
  const status = document.getElementById('status');
93
  const transcript = document.getElementById('transcript');
94
 
95
  async function startRecording() {
96
  recBuffer = [];
97
+ transcript.innerText = "";
98
  try {
99
  stream = await navigator.mediaDevices.getUserMedia({ audio: true });
100
  audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
 
104
  input.connect(processor);
105
  processor.connect(audioContext.destination);
106
 
107
+ status.innerText = "LISTENING (4 SECONDS)...";
108
+ btn.style.background = "#34c759"; // Green while recording
109
 
110
+ // Automatically stop after 4 seconds
111
  setTimeout(stopRecording, 4000);
112
+ } catch (e) { alert("Microphone access denied."); }
113
  }
114
 
115
  function stopRecording() {
116
  if (!stream || status.innerText === "PROCESSING...") return;
117
  status.innerText = "PROCESSING...";
118
+ btn.style.background = "#ff3b30";
119
+
120
  input.disconnect(); processor.disconnect();
121
  stream.getTracks().forEach(t => t.stop());
122
 
 
126
  fetch('/process_audio_full', { method: 'POST', body: wavBlob })
127
  .then(r => r.json())
128
  .then(data => {
129
+ // SHOW WHAT WE SAID
130
  transcript.innerText = "YOU SAID: " + data.user_text;
131
+
132
  if(data.audio_url) {
133
  const audio = new Audio(data.audio_url);
134
  audio.play();
135
+ status.innerText = "SIR REPLYING...";
136
+ audio.onended = () => { status.innerText = "READY / WAITING FOR TRIGGER"; };
137
  }
138
+ }).catch(() => { status.innerText = "ERROR"; });
139
  }
140
 
 
141
  function mergeBuffers(buffer) {
142
  let length = buffer.length * 4096, result = new Float32Array(length), offset = 0;
143
  for (let i = 0; i < buffer.length; i++) { result.set(buffer[i], offset); offset += buffer[i].length; }
144
  return result;
145
  }
146
+
147
  function encodeWAV(samples) {
148
  let buffer = new ArrayBuffer(44 + samples.length * 2), view = new DataView(buffer);
149
  const writeStr = (s, o) => { for (let i=0; i<s.length; i++) view.setUint8(o+i, s.charCodeAt(i)); };
 
162
  return new Blob([view], { type: 'audio/wav' });
163
  }
164
 
165
+ // POLL FOR EXTERNAL TRIGGER
166
  setInterval(() => {
167
+ if (status.innerText.includes("READY")) {
168
  fetch('/check_trigger')
169
  .then(r => r.json())
170
  .then(data => {
171
  if (data.trigger) startRecording();
172
  });
173
  }
174
+ }, 1000);
175
 
176
+ // Initialize audio on first click (Browser requirement)
177
+ btn.onclick = () => { status.innerText = "SIR READY & ACTIVE"; };
178
  </script>
179
  </body>
180
  </html>
181
+ """)
182
+
183
+ @app.route('/process_audio_full', methods=['POST'])
184
+ def process_audio():
185
+ global latest_user_text
186
+ try:
187
+ audio_file = io.BytesIO(request.data)
188
+ samplerate, data = wavfile.read(audio_file)
189
+ if data.dtype != np.float32:
190
+ data = data.astype(np.float32) / 32768.0
191
+
192
+ stt_result = stt_pipeline({"sampling_rate": samplerate, "raw": data})
193
+ user_text = stt_result.get('text', '').strip() or "..."
194
+ latest_user_text = user_text # Store what we said
195
+
196
+ ai_reply = ask_llama(user_text)
197
+
198
+ file_id = str(uuid.uuid4())
199
+ path = f"/tmp/{file_id}.mp3"
200
+ gTTS(text=ai_reply, lang='en').save(path)
201
+
202
+ return jsonify({
203
+ "audio_url": f"/get_audio/{file_id}",
204
+ "user_text": user_text,
205
+ "ai_text": ai_reply
206
+ })
207
+ except Exception as e:
208
+ logger.error(e)
209
+ return jsonify({"error": "Failed to process audio"}), 500
210
+
211
+ @app.route('/get_audio/<fid>')
212
+ def get_audio(fid):
213
+ return send_file(f"/tmp/{fid}.mp3")
214
+
215
+ @app.route('/latest_text')
216
+ def latest():
217
+ return latest_ai_text
218
+
219
+ if __name__ == '__main__':
220
+ app.run(host='0.0.0.0', port=7860)