Spaces:
Sleeping
Sleeping
| <html lang="vi"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>F5-TTS Vietnamese - Text-to-Speech</title> | |
| <style> | |
| * { | |
| margin: 0; | |
| padding: 0; | |
| box-sizing: border-box; | |
| } | |
| body { | |
| font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| min-height: 100vh; | |
| padding: 20px; | |
| } | |
| .container { | |
| max-width: 900px; | |
| margin: 0 auto; | |
| background: white; | |
| border-radius: 20px; | |
| box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3); | |
| overflow: hidden; | |
| } | |
| .header { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 30px; | |
| text-align: center; | |
| } | |
| .header h1 { | |
| font-size: 2.5em; | |
| margin-bottom: 10px; | |
| } | |
| .header p { | |
| font-size: 1.1em; | |
| opacity: 0.9; | |
| } | |
| .content { | |
| padding: 40px; | |
| } | |
| .form-group { | |
| margin-bottom: 25px; | |
| } | |
| label { | |
| display: block; | |
| font-weight: 600; | |
| margin-bottom: 10px; | |
| color: #333; | |
| font-size: 1.1em; | |
| } | |
| .file-input-wrapper { | |
| position: relative; | |
| overflow: hidden; | |
| display: inline-block; | |
| width: 100%; | |
| } | |
| .file-input-wrapper input[type=file] { | |
| position: absolute; | |
| left: -9999px; | |
| } | |
| .file-input-label { | |
| display: block; | |
| padding: 15px 20px; | |
| background: #f8f9fa; | |
| border: 2px dashed #667eea; | |
| border-radius: 10px; | |
| cursor: pointer; | |
| text-align: center; | |
| transition: all 0.3s; | |
| } | |
| .file-input-label:hover { | |
| background: #e7e9fc; | |
| border-color: #764ba2; | |
| } | |
| .file-name { | |
| margin-top: 10px; | |
| font-size: 0.9em; | |
| color: #666; | |
| } | |
| textarea { | |
| width: 100%; | |
| padding: 15px; | |
| border: 2px solid #e0e0e0; | |
| border-radius: 10px; | |
| font-size: 1em; | |
| resize: vertical; | |
| min-height: 120px; | |
| font-family: inherit; | |
| transition: border-color 0.3s; | |
| } | |
| textarea:focus { | |
| outline: none; | |
| border-color: #667eea; | |
| } | |
| .slider-group { | |
| margin-bottom: 25px; | |
| } | |
| .slider-label { | |
| display: flex; | |
| justify-content: space-between; | |
| margin-bottom: 10px; | |
| } | |
| input[type="range"] { | |
| width: 100%; | |
| height: 8px; | |
| border-radius: 5px; | |
| background: #e0e0e0; | |
| outline: none; | |
| -webkit-appearance: none; | |
| } | |
| input[type="range"]::-webkit-slider-thumb { | |
| -webkit-appearance: none; | |
| appearance: none; | |
| width: 20px; | |
| height: 20px; | |
| border-radius: 50%; | |
| background: #667eea; | |
| cursor: pointer; | |
| } | |
| input[type="range"]::-moz-range-thumb { | |
| width: 20px; | |
| height: 20px; | |
| border-radius: 50%; | |
| background: #667eea; | |
| cursor: pointer; | |
| } | |
| .btn { | |
| width: 100%; | |
| padding: 15px; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| border: none; | |
| border-radius: 10px; | |
| font-size: 1.2em; | |
| font-weight: 600; | |
| cursor: pointer; | |
| transition: transform 0.2s, box-shadow 0.2s; | |
| } | |
| .btn:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 10px 20px rgba(102, 126, 234, 0.4); | |
| } | |
| .btn:disabled { | |
| background: #ccc; | |
| cursor: not-allowed; | |
| transform: none; | |
| } | |
| .loading { | |
| display: none; | |
| text-align: center; | |
| margin: 20px 0; | |
| } | |
| .spinner { | |
| border: 4px solid #f3f3f3; | |
| border-top: 4px solid #667eea; | |
| border-radius: 50%; | |
| width: 40px; | |
| height: 40px; | |
| animation: spin 1s linear infinite; | |
| margin: 0 auto; | |
| } | |
| @keyframes spin { | |
| 0% { transform: rotate(0deg); } | |
| 100% { transform: rotate(360deg); } | |
| } | |
| .result { | |
| display: none; | |
| margin-top: 30px; | |
| padding: 20px; | |
| background: #f8f9fa; | |
| border-radius: 10px; | |
| } | |
| .result h3 { | |
| margin-bottom: 15px; | |
| color: #333; | |
| } | |
| audio { | |
| width: 100%; | |
| margin-bottom: 15px; | |
| } | |
| .spectrogram { | |
| width: 100%; | |
| border-radius: 10px; | |
| margin-top: 15px; | |
| } | |
| .error { | |
| display: none; | |
| padding: 15px; | |
| background: #fee; | |
| border-left: 4px solid #f44; | |
| border-radius: 5px; | |
| color: #c00; | |
| margin-top: 20px; | |
| } | |
| .info-box { | |
| background: #fff3cd; | |
| border-left: 4px solid #ffc107; | |
| padding: 15px; | |
| border-radius: 5px; | |
| margin-top: 30px; | |
| } | |
| .info-box h4 { | |
| margin-bottom: 10px; | |
| color: #856404; | |
| } | |
| .info-box ul { | |
| margin-left: 20px; | |
| } | |
| .info-box li { | |
| margin-bottom: 5px; | |
| color: #856404; | |
| } | |
| .api-docs { | |
| margin-top: 30px; | |
| padding: 20px; | |
| background: #f8f9fa; | |
| border-radius: 10px; | |
| } | |
| .api-docs h3 { | |
| margin-bottom: 15px; | |
| color: #333; | |
| } | |
| .api-docs pre { | |
| background: #2d2d2d; | |
| color: #f8f8f2; | |
| padding: 15px; | |
| border-radius: 5px; | |
| overflow-x: auto; | |
| font-size: 0.9em; | |
| } | |
| .api-docs code { | |
| font-family: 'Courier New', monospace; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <div class="header"> | |
| <h1>🎤 F5-TTS Vietnamese</h1> | |
| <p>Text-to-Speech Synthesis • Trained on ~1000 hours of data</p> | |
| </div> | |
| <div class="content"> | |
| <form id="ttsForm"> | |
| <div class="form-group"> | |
| <label>🔊 Sample Voice (Audio Reference)</label> | |
| <div class="file-input-wrapper"> | |
| <input type="file" id="refAudio" name="ref_audio" accept="audio/*" required> | |
| <label for="refAudio" class="file-input-label"> | |
| 📁 Click to upload audio file | |
| </label> | |
| </div> | |
| <div class="file-name" id="fileName"></div> | |
| </div> | |
| <div class="form-group"> | |
| <label for="genText">📝 Text to Synthesize</label> | |
| <textarea id="genText" name="gen_text" placeholder="Nhập văn bản tiếng Việt để tạo giọng nói..." required></textarea> | |
| </div> | |
| <div class="slider-group"> | |
| <div class="slider-label"> | |
| <label>⚡ Speed</label> | |
| <span id="speedValue">1.0x</span> | |
| </div> | |
| <input type="range" id="speed" name="speed" min="0.3" max="2.0" step="0.1" value="1.0"> | |
| </div> | |
| <button type="submit" class="btn" id="submitBtn"> | |
| 🔥 Generate Speech | |
| </button> | |
| </form> | |
| <div class="loading" id="loading"> | |
| <div class="spinner"></div> | |
| <p style="margin-top: 15px; color: #666;">Generating speech... Please wait...</p> | |
| </div> | |
| <div class="error" id="error"></div> | |
| <div class="result" id="result"> | |
| <h3>🎧 Generated Audio</h3> | |
| <audio id="audioPlayer" controls></audio> | |
| <h3>📊 Spectrogram</h3> | |
| <img id="spectrogram" class="spectrogram" alt="Spectrogram"> | |
| </div> | |
| <div class="info-box"> | |
| <h4>❗ Model Limitations</h4> | |
| <ul> | |
| <li>May not perform well with numbers, dates, and special characters</li> | |
| <li>Rhythm may be inconsistent with some texts</li> | |
| <li>Works best with clear, well-pronounced reference audio</li> | |
| <li>Maximum 1000 words per request</li> | |
| </ul> | |
| </div> | |
| <div class="api-docs"> | |
| <h3>📡 API Documentation</h3> | |
| <p style="margin-bottom: 15px;">Use the following endpoint to integrate with your application:</p> | |
| <h4>POST /api/synthesize</h4> | |
| <pre><code>curl -X POST http://localhost:5000/api/synthesize \ | |
| -F "[email protected]" \ | |
| -F "gen_text=Xin chào, đây là giọng nói tổng hợp" \ | |
| -F "speed=1.0"</code></pre> | |
| <h4 style="margin-top: 20px;">Response:</h4> | |
| <pre><code>{ | |
| "success": true, | |
| "audio": "base64_encoded_audio_data", | |
| "spectrogram": "base64_encoded_image_data", | |
| "sample_rate": 24000, | |
| "message": "Speech synthesized successfully" | |
| }</code></pre> | |
| <h4 style="margin-top: 20px;">GET /api/health</h4> | |
| <p style="margin-bottom: 10px;">Check if the service is running:</p> | |
| <pre><code>curl http://localhost:5000/api/health</code></pre> | |
| <h4 style="margin-top: 20px;">GET /api/info</h4> | |
| <p style="margin-bottom: 10px;">Get model information:</p> | |
| <pre><code>curl http://localhost:5000/api/info</code></pre> | |
| </div> | |
| </div> | |
| </div> | |
| <script> | |
| const form = document.getElementById('ttsForm'); | |
| const refAudioInput = document.getElementById('refAudio'); | |
| const fileNameDiv = document.getElementById('fileName'); | |
| const speedSlider = document.getElementById('speed'); | |
| const speedValue = document.getElementById('speedValue'); | |
| const submitBtn = document.getElementById('submitBtn'); | |
| const loading = document.getElementById('loading'); | |
| const error = document.getElementById('error'); | |
| const result = document.getElementById('result'); | |
| const audioPlayer = document.getElementById('audioPlayer'); | |
| const spectrogram = document.getElementById('spectrogram'); | |
| // Update file name display | |
| refAudioInput.addEventListener('change', function(e) { | |
| if (e.target.files.length > 0) { | |
| fileNameDiv.textContent = '✅ ' + e.target.files[0].name; | |
| } | |
| }); | |
| // Update speed value display | |
| speedSlider.addEventListener('input', function(e) { | |
| speedValue.textContent = e.target.value + 'x'; | |
| }); | |
| // Handle form submission | |
| form.addEventListener('submit', async function(e) { | |
| e.preventDefault(); | |
| // Hide previous results and errors | |
| result.style.display = 'none'; | |
| error.style.display = 'none'; | |
| // Show loading | |
| loading.style.display = 'block'; | |
| submitBtn.disabled = true; | |
| try { | |
| const formData = new FormData(form); | |
| const response = await fetch('/api/synthesize', { | |
| method: 'POST', | |
| body: formData | |
| }); | |
| const data = await response.json(); | |
| if (response.ok && data.success) { | |
| // Display audio | |
| const audioBlob = base64ToBlob(data.audio, 'audio/wav'); | |
| const audioUrl = URL.createObjectURL(audioBlob); | |
| audioPlayer.src = audioUrl; | |
| // Display spectrogram | |
| spectrogram.src = 'data:image/png;base64,' + data.spectrogram; | |
| result.style.display = 'block'; | |
| } else { | |
| throw new Error(data.error || 'Unknown error occurred'); | |
| } | |
| } catch (err) { | |
| error.textContent = '❌ ' + err.message; | |
| error.style.display = 'block'; | |
| } finally { | |
| loading.style.display = 'none'; | |
| submitBtn.disabled = false; | |
| } | |
| }); | |
| // Helper function to convert base64 to Blob | |
| function base64ToBlob(base64, mimeType) { | |
| const byteCharacters = atob(base64); | |
| const byteNumbers = new Array(byteCharacters.length); | |
| for (let i = 0; i < byteCharacters.length; i++) { | |
| byteNumbers[i] = byteCharacters.charCodeAt(i); | |
| } | |
| const byteArray = new Uint8Array(byteNumbers); | |
| return new Blob([byteArray], { type: mimeType }); | |
| } | |
| </script> | |
| </body> | |
| </html> | |