manhteky123's picture
Upload 5 files
258b448 verified
<!DOCTYPE html>
<html lang="vi">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>F5-TTS Vietnamese - Text-to-Speech</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 900px;
margin: 0 auto;
background: white;
border-radius: 20px;
box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
overflow: hidden;
}
.header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 30px;
text-align: center;
}
.header h1 {
font-size: 2.5em;
margin-bottom: 10px;
}
.header p {
font-size: 1.1em;
opacity: 0.9;
}
.content {
padding: 40px;
}
.form-group {
margin-bottom: 25px;
}
label {
display: block;
font-weight: 600;
margin-bottom: 10px;
color: #333;
font-size: 1.1em;
}
.file-input-wrapper {
position: relative;
overflow: hidden;
display: inline-block;
width: 100%;
}
.file-input-wrapper input[type=file] {
position: absolute;
left: -9999px;
}
.file-input-label {
display: block;
padding: 15px 20px;
background: #f8f9fa;
border: 2px dashed #667eea;
border-radius: 10px;
cursor: pointer;
text-align: center;
transition: all 0.3s;
}
.file-input-label:hover {
background: #e7e9fc;
border-color: #764ba2;
}
.file-name {
margin-top: 10px;
font-size: 0.9em;
color: #666;
}
textarea {
width: 100%;
padding: 15px;
border: 2px solid #e0e0e0;
border-radius: 10px;
font-size: 1em;
resize: vertical;
min-height: 120px;
font-family: inherit;
transition: border-color 0.3s;
}
textarea:focus {
outline: none;
border-color: #667eea;
}
.slider-group {
margin-bottom: 25px;
}
.slider-label {
display: flex;
justify-content: space-between;
margin-bottom: 10px;
}
input[type="range"] {
width: 100%;
height: 8px;
border-radius: 5px;
background: #e0e0e0;
outline: none;
-webkit-appearance: none;
}
input[type="range"]::-webkit-slider-thumb {
-webkit-appearance: none;
appearance: none;
width: 20px;
height: 20px;
border-radius: 50%;
background: #667eea;
cursor: pointer;
}
input[type="range"]::-moz-range-thumb {
width: 20px;
height: 20px;
border-radius: 50%;
background: #667eea;
cursor: pointer;
}
.btn {
width: 100%;
padding: 15px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
border: none;
border-radius: 10px;
font-size: 1.2em;
font-weight: 600;
cursor: pointer;
transition: transform 0.2s, box-shadow 0.2s;
}
.btn:hover {
transform: translateY(-2px);
box-shadow: 0 10px 20px rgba(102, 126, 234, 0.4);
}
.btn:disabled {
background: #ccc;
cursor: not-allowed;
transform: none;
}
.loading {
display: none;
text-align: center;
margin: 20px 0;
}
.spinner {
border: 4px solid #f3f3f3;
border-top: 4px solid #667eea;
border-radius: 50%;
width: 40px;
height: 40px;
animation: spin 1s linear infinite;
margin: 0 auto;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
.result {
display: none;
margin-top: 30px;
padding: 20px;
background: #f8f9fa;
border-radius: 10px;
}
.result h3 {
margin-bottom: 15px;
color: #333;
}
audio {
width: 100%;
margin-bottom: 15px;
}
.spectrogram {
width: 100%;
border-radius: 10px;
margin-top: 15px;
}
.error {
display: none;
padding: 15px;
background: #fee;
border-left: 4px solid #f44;
border-radius: 5px;
color: #c00;
margin-top: 20px;
}
.info-box {
background: #fff3cd;
border-left: 4px solid #ffc107;
padding: 15px;
border-radius: 5px;
margin-top: 30px;
}
.info-box h4 {
margin-bottom: 10px;
color: #856404;
}
.info-box ul {
margin-left: 20px;
}
.info-box li {
margin-bottom: 5px;
color: #856404;
}
.api-docs {
margin-top: 30px;
padding: 20px;
background: #f8f9fa;
border-radius: 10px;
}
.api-docs h3 {
margin-bottom: 15px;
color: #333;
}
.api-docs pre {
background: #2d2d2d;
color: #f8f8f2;
padding: 15px;
border-radius: 5px;
overflow-x: auto;
font-size: 0.9em;
}
.api-docs code {
font-family: 'Courier New', monospace;
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🎤 F5-TTS Vietnamese</h1>
<p>Text-to-Speech Synthesis • Trained on ~1000 hours of data</p>
</div>
<div class="content">
<form id="ttsForm">
<div class="form-group">
<label>🔊 Sample Voice (Audio Reference)</label>
<div class="file-input-wrapper">
<input type="file" id="refAudio" name="ref_audio" accept="audio/*" required>
<label for="refAudio" class="file-input-label">
📁 Click to upload audio file
</label>
</div>
<div class="file-name" id="fileName"></div>
</div>
<div class="form-group">
<label for="genText">📝 Text to Synthesize</label>
<textarea id="genText" name="gen_text" placeholder="Nhập văn bản tiếng Việt để tạo giọng nói..." required></textarea>
</div>
<div class="slider-group">
<div class="slider-label">
<label>⚡ Speed</label>
<span id="speedValue">1.0x</span>
</div>
<input type="range" id="speed" name="speed" min="0.3" max="2.0" step="0.1" value="1.0">
</div>
<button type="submit" class="btn" id="submitBtn">
🔥 Generate Speech
</button>
</form>
<div class="loading" id="loading">
<div class="spinner"></div>
<p style="margin-top: 15px; color: #666;">Generating speech... Please wait...</p>
</div>
<div class="error" id="error"></div>
<div class="result" id="result">
<h3>🎧 Generated Audio</h3>
<audio id="audioPlayer" controls></audio>
<h3>📊 Spectrogram</h3>
<img id="spectrogram" class="spectrogram" alt="Spectrogram">
</div>
<div class="info-box">
<h4>❗ Model Limitations</h4>
<ul>
<li>May not perform well with numbers, dates, and special characters</li>
<li>Rhythm may be inconsistent with some texts</li>
<li>Works best with clear, well-pronounced reference audio</li>
<li>Maximum 1000 words per request</li>
</ul>
</div>
<div class="api-docs">
<h3>📡 API Documentation</h3>
<p style="margin-bottom: 15px;">Use the following endpoint to integrate with your application:</p>
<h4>POST /api/synthesize</h4>
<pre><code>curl -X POST http://localhost:5000/api/synthesize \
-F "[email protected]" \
-F "gen_text=Xin chào, đây là giọng nói tổng hợp" \
-F "speed=1.0"</code></pre>
<h4 style="margin-top: 20px;">Response:</h4>
<pre><code>{
"success": true,
"audio": "base64_encoded_audio_data",
"spectrogram": "base64_encoded_image_data",
"sample_rate": 24000,
"message": "Speech synthesized successfully"
}</code></pre>
<h4 style="margin-top: 20px;">GET /api/health</h4>
<p style="margin-bottom: 10px;">Check if the service is running:</p>
<pre><code>curl http://localhost:5000/api/health</code></pre>
<h4 style="margin-top: 20px;">GET /api/info</h4>
<p style="margin-bottom: 10px;">Get model information:</p>
<pre><code>curl http://localhost:5000/api/info</code></pre>
</div>
</div>
</div>
<script>
const form = document.getElementById('ttsForm');
const refAudioInput = document.getElementById('refAudio');
const fileNameDiv = document.getElementById('fileName');
const speedSlider = document.getElementById('speed');
const speedValue = document.getElementById('speedValue');
const submitBtn = document.getElementById('submitBtn');
const loading = document.getElementById('loading');
const error = document.getElementById('error');
const result = document.getElementById('result');
const audioPlayer = document.getElementById('audioPlayer');
const spectrogram = document.getElementById('spectrogram');
// Update file name display
refAudioInput.addEventListener('change', function(e) {
if (e.target.files.length > 0) {
fileNameDiv.textContent = '✅ ' + e.target.files[0].name;
}
});
// Update speed value display
speedSlider.addEventListener('input', function(e) {
speedValue.textContent = e.target.value + 'x';
});
// Handle form submission
form.addEventListener('submit', async function(e) {
e.preventDefault();
// Hide previous results and errors
result.style.display = 'none';
error.style.display = 'none';
// Show loading
loading.style.display = 'block';
submitBtn.disabled = true;
try {
const formData = new FormData(form);
const response = await fetch('/api/synthesize', {
method: 'POST',
body: formData
});
const data = await response.json();
if (response.ok && data.success) {
// Display audio
const audioBlob = base64ToBlob(data.audio, 'audio/wav');
const audioUrl = URL.createObjectURL(audioBlob);
audioPlayer.src = audioUrl;
// Display spectrogram
spectrogram.src = 'data:image/png;base64,' + data.spectrogram;
result.style.display = 'block';
} else {
throw new Error(data.error || 'Unknown error occurred');
}
} catch (err) {
error.textContent = '❌ ' + err.message;
error.style.display = 'block';
} finally {
loading.style.display = 'none';
submitBtn.disabled = false;
}
});
// Helper function to convert base64 to Blob
function base64ToBlob(base64, mimeType) {
const byteCharacters = atob(base64);
const byteNumbers = new Array(byteCharacters.length);
for (let i = 0; i < byteCharacters.length; i++) {
byteNumbers[i] = byteCharacters.charCodeAt(i);
}
const byteArray = new Uint8Array(byteNumbers);
return new Blob([byteArray], { type: mimeType });
}
</script>
</body>
</html>