HeWhoComes commited on
Commit
c8d46a9
Β·
verified Β·
1 Parent(s): b864501

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +495 -0
app.py ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ZW Kitten - Working FastAPI version with real KittenTTS
4
+ """
5
+ import os
6
+ import tempfile
7
+ from fastapi import FastAPI, Request
8
+ from fastapi.responses import HTMLResponse, FileResponse, JSONResponse
9
+ from fastapi.staticfiles import StaticFiles
10
+ import uvicorn
11
+
12
+ # Try to import KittenTTS, fall back to demo if not available
13
+ try:
14
+ from kittentts import KittenTTS
15
+ KITTEN_AVAILABLE = True
16
+ print("βœ… KittenTTS loaded successfully")
17
+ except ImportError:
18
+ KITTEN_AVAILABLE = False
19
+ print("⚠️ KittenTTS not available, using demo mode")
20
+
21
+ app = FastAPI(title="ZW Kitten TTS")
22
+
23
+ # Initialize TTS if available
24
+ if KITTEN_AVAILABLE:
25
+ try:
26
+ tts = KittenTTS()
27
+ print("βœ… KittenTTS initialized")
28
+ except Exception as e:
29
+ KITTEN_AVAILABLE = False
30
+ print(f"⚠️ KittenTTS failed to initialize: {e}")
31
+
32
+ @app.post("/generate")
33
+ async def generate_speech(request: Request):
34
+ """Generate speech from ZW protocol request"""
35
+ data = await request.json()
36
+
37
+ text = data.get('text', '').strip()
38
+ character = data.get('character', 'claude')
39
+ emotion = data.get('emotion', 'neutral')
40
+
41
+ if not text:
42
+ return JSONResponse({"error": "No text provided"}, status_code=400)
43
+
44
+ # Character to voice mapping
45
+ voice_map = {
46
+ 'claude': 'expr-voice-2-m',
47
+ 'keen': 'expr-voice-2-f',
48
+ 'tran': 'expr-voice-3-m',
49
+ 'isla': 'expr-voice-4-f',
50
+ 'system': 'expr-voice-5-m',
51
+ 'narrator': 'expr-voice-5-f'
52
+ }
53
+
54
+ # Emotion to speed mapping
55
+ speed_map = {
56
+ 'neutral': 1.0,
57
+ 'cosmic_awareness': 0.8,
58
+ 'determined': 1.1,
59
+ 'whisper': 0.9,
60
+ 'urgent': 1.2,
61
+ 'calm': 0.9
62
+ }
63
+
64
+ voice = voice_map.get(character, 'expr-voice-2-m')
65
+ speed = speed_map.get(emotion, 1.0)
66
+
67
+ try:
68
+ if KITTEN_AVAILABLE:
69
+ # Generate real audio with KittenTTS
70
+ audio = tts.generate(text, voice=voice, speed=speed)
71
+
72
+ # Save to temporary file
73
+ temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
74
+ import soundfile as sf
75
+ sf.write(temp_file.name, audio, 24000)
76
+
77
+ return JSONResponse({
78
+ "success": True,
79
+ "audio_url": f"/audio/{os.path.basename(temp_file.name)}",
80
+ "zw_block": f"""!zw/dialogue.intent:
81
+ character: {character}
82
+ emotion: {emotion}
83
+ line: {text}""",
84
+ "info": f"""🎭 Character: {character.title()}
85
+ 😊 Emotion: {emotion.replace('_', ' ').title()} (speed: {speed:.1f}x)
86
+ 🎡 Voice: {voice}
87
+ πŸ“ Text: "{text}"
88
+
89
+ βœ… Real KittenTTS audio generated!"""
90
+ })
91
+ else:
92
+ # Demo mode - no actual audio
93
+ return JSONResponse({
94
+ "success": False,
95
+ "error": "KittenTTS not available",
96
+ "zw_block": f"""!zw/dialogue.intent:
97
+ character: {character}
98
+ emotion: {emotion}
99
+ line: {text}""",
100
+ "info": f"""🎭 Character: {character.title()}
101
+ 😊 Emotion: {emotion.replace('_', ' ').title()} (speed: {speed:.1f}x)
102
+ 🎡 Voice: {voice}
103
+ πŸ“ Text: "{text}"
104
+
105
+ ⚠️ Demo mode - install KittenTTS for real audio generation"""
106
+ })
107
+
108
+ except Exception as e:
109
+ return JSONResponse({"error": str(e)}, status_code=500)
110
+
111
+ @app.get("/audio/{filename}")
112
+ async def serve_audio(filename: str):
113
+ """Serve generated audio files"""
114
+ file_path = os.path.join(tempfile.gettempdir(), filename)
115
+ if os.path.exists(file_path):
116
+ return FileResponse(file_path, media_type="audio/wav")
117
+ return JSONResponse({"error": "File not found"}, status_code=404)
118
+
119
+ @app.get("/", response_class=HTMLResponse)
120
+ def serve_interface():
121
+ return """
122
+ <!DOCTYPE html>
123
+ <html lang="en">
124
+ <head>
125
+ <meta charset="UTF-8">
126
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
127
+ <title>🐱 ZW Kitten - Working TTS</title>
128
+ <style>
129
+ body {
130
+ font-family: 'Segoe UI', system-ui, sans-serif;
131
+ max-width: 1200px;
132
+ margin: 0 auto;
133
+ padding: 20px;
134
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
135
+ min-height: 100vh;
136
+ color: #333;
137
+ }
138
+
139
+ .container {
140
+ background: white;
141
+ border-radius: 20px;
142
+ padding: 40px;
143
+ box-shadow: 0 20px 40px rgba(0,0,0,0.1);
144
+ }
145
+
146
+ .header {
147
+ text-align: center;
148
+ margin-bottom: 40px;
149
+ }
150
+
151
+ .header h1 {
152
+ font-size: 3em;
153
+ margin: 0;
154
+ background: linear-gradient(45deg, #ff6b6b, #4ecdc4);
155
+ -webkit-background-clip: text;
156
+ -webkit-text-fill-color: transparent;
157
+ background-clip: text;
158
+ }
159
+
160
+ .subtitle {
161
+ font-size: 1.2em;
162
+ color: #666;
163
+ margin: 10px 0;
164
+ }
165
+
166
+ .input-group {
167
+ margin: 30px 0;
168
+ }
169
+
170
+ label {
171
+ display: block;
172
+ font-weight: bold;
173
+ margin-bottom: 10px;
174
+ color: #444;
175
+ }
176
+
177
+ textarea {
178
+ width: 100%;
179
+ padding: 15px;
180
+ border: 2px solid #e0e0e0;
181
+ border-radius: 10px;
182
+ font-size: 16px;
183
+ transition: border-color 0.3s;
184
+ box-sizing: border-box;
185
+ }
186
+
187
+ textarea:focus {
188
+ outline: none;
189
+ border-color: #4ecdc4;
190
+ }
191
+
192
+ .character-grid {
193
+ display: grid;
194
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
195
+ gap: 15px;
196
+ margin: 20px 0;
197
+ }
198
+
199
+ .character-card {
200
+ padding: 15px;
201
+ border: 2px solid #e0e0e0;
202
+ border-radius: 10px;
203
+ cursor: pointer;
204
+ transition: all 0.3s;
205
+ text-align: center;
206
+ }
207
+
208
+ .character-card.selected {
209
+ border-color: #4ecdc4;
210
+ background: #e3f2fd;
211
+ }
212
+
213
+ .character-card:hover {
214
+ border-color: #4ecdc4;
215
+ transform: translateY(-2px);
216
+ }
217
+
218
+ .emotion-grid {
219
+ display: grid;
220
+ grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
221
+ gap: 10px;
222
+ margin: 20px 0;
223
+ }
224
+
225
+ .emotion-card {
226
+ padding: 10px;
227
+ border: 2px solid #e0e0e0;
228
+ border-radius: 8px;
229
+ cursor: pointer;
230
+ transition: all 0.3s;
231
+ text-align: center;
232
+ font-size: 14px;
233
+ }
234
+
235
+ .emotion-card.selected {
236
+ border-color: #ff6b6b;
237
+ background: #fff3e0;
238
+ }
239
+
240
+ .emotion-card:hover {
241
+ border-color: #ff6b6b;
242
+ }
243
+
244
+ .generate-btn {
245
+ width: 100%;
246
+ padding: 20px;
247
+ background: linear-gradient(45deg, #4ecdc4, #44a08d);
248
+ color: white;
249
+ border: none;
250
+ border-radius: 15px;
251
+ font-size: 18px;
252
+ font-weight: bold;
253
+ cursor: pointer;
254
+ transition: transform 0.3s;
255
+ }
256
+
257
+ .generate-btn:hover {
258
+ transform: translateY(-3px);
259
+ }
260
+
261
+ .generate-btn:disabled {
262
+ opacity: 0.6;
263
+ cursor: not-allowed;
264
+ transform: none;
265
+ }
266
+
267
+ .output-section {
268
+ margin-top: 40px;
269
+ padding: 30px;
270
+ background: #f8f9fa;
271
+ border-radius: 15px;
272
+ }
273
+
274
+ .audio-player {
275
+ width: 100%;
276
+ margin: 20px 0;
277
+ }
278
+
279
+ .zw-format {
280
+ background: #2d3748;
281
+ color: #e2e8f0;
282
+ padding: 20px;
283
+ border-radius: 10px;
284
+ font-family: 'Courier New', monospace;
285
+ margin: 20px 0;
286
+ overflow-x: auto;
287
+ }
288
+
289
+ .info-box {
290
+ background: #e8f5e8;
291
+ padding: 20px;
292
+ border-radius: 10px;
293
+ margin: 20px 0;
294
+ border-left: 4px solid #4caf50;
295
+ white-space: pre-line;
296
+ }
297
+
298
+ .error-box {
299
+ background: #ffeaea;
300
+ padding: 20px;
301
+ border-radius: 10px;
302
+ margin: 20px 0;
303
+ border-left: 4px solid #f44336;
304
+ white-space: pre-line;
305
+ }
306
+
307
+ .loading {
308
+ text-align: center;
309
+ color: #666;
310
+ margin: 20px 0;
311
+ }
312
+
313
+ .repo-link {
314
+ text-align: center;
315
+ margin-top: 30px;
316
+ }
317
+
318
+ .repo-link a {
319
+ color: #4ecdc4;
320
+ text-decoration: none;
321
+ font-weight: bold;
322
+ }
323
+ </style>
324
+ </head>
325
+ <body>
326
+ <div class="container">
327
+ <div class="header">
328
+ <h1>🐱 ZW Kitten</h1>
329
+ <div class="subtitle">ZiegelWagga Protocol Text-to-Speech</div>
330
+ <div class="subtitle">Working voice synthesis with character personalities</div>
331
+ </div>
332
+
333
+ <div class="input-group">
334
+ <label for="text-input">πŸ’¬ Text to Speak</label>
335
+ <textarea id="text-input" rows="3" placeholder="Enter your message here...">The ZW protocol awakens across all dimensions</textarea>
336
+ </div>
337
+
338
+ <div class="input-group">
339
+ <label>🎭 Character</label>
340
+ <div class="character-grid" id="character-grid">
341
+ <div class="character-card selected" data-character="claude">
342
+ <div><strong>Claude</strong></div>
343
+ <div>Analytical Male</div>
344
+ </div>
345
+ <div class="character-card" data-character="keen">
346
+ <div><strong>Keen</strong></div>
347
+ <div>Clear Female</div>
348
+ </div>
349
+ <div class="character-card" data-character="tran">
350
+ <div><strong>Tran</strong></div>
351
+ <div>Deep Male</div>
352
+ </div>
353
+ <div class="character-card" data-character="isla">
354
+ <div><strong>Isla</strong></div>
355
+ <div>Tactical Female</div>
356
+ </div>
357
+ <div class="character-card" data-character="system">
358
+ <div><strong>System</strong></div>
359
+ <div>Default Male</div>
360
+ </div>
361
+ <div class="character-card" data-character="narrator">
362
+ <div><strong>Narrator</strong></div>
363
+ <div>Default Female</div>
364
+ </div>
365
+ </div>
366
+ </div>
367
+
368
+ <div class="input-group">
369
+ <label>😊 Emotion</label>
370
+ <div class="emotion-grid" id="emotion-grid">
371
+ <div class="emotion-card selected" data-emotion="neutral">Neutral</div>
372
+ <div class="emotion-card" data-emotion="cosmic_awareness">Cosmic Awareness</div>
373
+ <div class="emotion-card" data-emotion="determined">Determined</div>
374
+ <div class="emotion-card" data-emotion="whisper">Whisper</div>
375
+ <div class="emotion-card" data-emotion="urgent">Urgent</div>
376
+ <div class="emotion-card" data-emotion="calm">Calm</div>
377
+ </div>
378
+ </div>
379
+
380
+ <button class="generate-btn" onclick="generateSpeech()" id="generate-btn">πŸŽ™οΈ Generate Speech</button>
381
+
382
+ <div class="output-section" id="output-section" style="display: none;">
383
+ <h3>πŸ”Š Generated Audio</h3>
384
+ <audio id="audio-player" class="audio-player" controls style="display: none;">
385
+ Your browser does not support the audio element.
386
+ </audio>
387
+
388
+ <h3>πŸ“ ZW Protocol</h3>
389
+ <div class="zw-format" id="zw-output"></div>
390
+
391
+ <div id="info-output" class="info-box"></div>
392
+ </div>
393
+
394
+ <div class="repo-link">
395
+ <a href="https://github.com/SmokesBowls/zw-kitten-tts" target="_blank">
396
+ πŸ“š Full Repository: GitHub - zw-kitten-tts
397
+ </a>
398
+ </div>
399
+ </div>
400
+
401
+ <script>
402
+ let selectedCharacter = 'claude';
403
+ let selectedEmotion = 'neutral';
404
+
405
+ // Character selection
406
+ document.getElementById('character-grid').addEventListener('click', function(e) {
407
+ const card = e.target.closest('.character-card');
408
+ if (card) {
409
+ document.querySelectorAll('.character-card').forEach(c => c.classList.remove('selected'));
410
+ card.classList.add('selected');
411
+ selectedCharacter = card.dataset.character;
412
+ }
413
+ });
414
+
415
+ // Emotion selection
416
+ document.getElementById('emotion-grid').addEventListener('click', function(e) {
417
+ const card = e.target.closest('.emotion-card');
418
+ if (card) {
419
+ document.querySelectorAll('.emotion-card').forEach(c => c.classList.remove('selected'));
420
+ card.classList.add('selected');
421
+ selectedEmotion = card.dataset.emotion;
422
+ }
423
+ });
424
+
425
+ async function generateSpeech() {
426
+ const text = document.getElementById('text-input').value.trim();
427
+ const btn = document.getElementById('generate-btn');
428
+ const outputSection = document.getElementById('output-section');
429
+ const audioPlayer = document.getElementById('audio-player');
430
+ const zwOutput = document.getElementById('zw-output');
431
+ const infoOutput = document.getElementById('info-output');
432
+
433
+ if (!text) {
434
+ alert('Please enter some text to speak');
435
+ return;
436
+ }
437
+
438
+ // Show loading
439
+ btn.textContent = 'πŸ”„ Generating...';
440
+ btn.disabled = true;
441
+ outputSection.style.display = 'block';
442
+ infoOutput.className = 'loading';
443
+ infoOutput.textContent = 'Generating speech with KittenTTS...';
444
+
445
+ try {
446
+ const response = await fetch('/generate', {
447
+ method: 'POST',
448
+ headers: {
449
+ 'Content-Type': 'application/json',
450
+ },
451
+ body: JSON.stringify({
452
+ text: text,
453
+ character: selectedCharacter,
454
+ emotion: selectedEmotion
455
+ })
456
+ });
457
+
458
+ const result = await response.json();
459
+
460
+ if (result.success) {
461
+ // Show audio player
462
+ audioPlayer.src = result.audio_url;
463
+ audioPlayer.style.display = 'block';
464
+ infoOutput.className = 'info-box';
465
+ } else {
466
+ audioPlayer.style.display = 'none';
467
+ infoOutput.className = 'error-box';
468
+ }
469
+
470
+ // Show ZW protocol and info
471
+ zwOutput.textContent = result.zw_block;
472
+ infoOutput.textContent = result.info || result.error;
473
+
474
+ } catch (error) {
475
+ infoOutput.className = 'error-box';
476
+ infoOutput.textContent = `Error: ${error.message}`;
477
+ audioPlayer.style.display = 'none';
478
+ }
479
+
480
+ // Reset button
481
+ btn.textContent = 'πŸŽ™οΈ Generate Speech';
482
+ btn.disabled = false;
483
+ }
484
+ </script>
485
+ </body>
486
+ </html>
487
+ """
488
+
489
+ @app.get("/health")
490
+ def health_check():
491
+ return {"status": "healthy", "kitten_available": KITTEN_AVAILABLE}
492
+
493
+ if __name__ == "__main__":
494
+ port = int(os.environ.get("PORT", 7860))
495
+ uvicorn.run(app, host="0.0.0.0", port=port)