Spaces:
Sleeping
Sleeping
| """ | |
| Kokoro TTS Service - High-quality, local, commercially-free text-to-speech. | |
| Uses the Kokoro TTS model (82M params, Apache 2.0 license). | |
| """ | |
| import os | |
| import hashlib | |
| import logging | |
| from pathlib import Path | |
| from typing import Optional | |
| logger = logging.getLogger(__name__) | |
| # Get the base directory | |
| BASE_DIR = Path(__file__).parent.parent.parent | |
| # Available Kokoro voices with descriptions | |
| KOKORO_VOICES = { | |
| # American English voices | |
| "af_heart": {"name": "Heart (Female)", "lang": "a", "description": "Warm, friendly American female"}, | |
| "af_bella": {"name": "Bella (Female)", "lang": "a", "description": "Professional American female"}, | |
| "af_nicole": {"name": "Nicole (Female)", "lang": "a", "description": "Clear American female"}, | |
| "af_sarah": {"name": "Sarah (Female)", "lang": "a", "description": "Natural American female"}, | |
| "af_sky": {"name": "Sky (Female)", "lang": "a", "description": "Bright American female"}, | |
| "am_adam": {"name": "Adam (Male)", "lang": "a", "description": "Professional American male"}, | |
| "am_michael": {"name": "Michael (Male)", "lang": "a", "description": "Deep American male"}, | |
| # British English voices | |
| "bf_emma": {"name": "Emma (British Female)", "lang": "b", "description": "Professional British female"}, | |
| "bf_isabella": {"name": "Isabella (British Female)", "lang": "b", "description": "Warm British female"}, | |
| "bm_george": {"name": "George (British Male)", "lang": "b", "description": "Professional British male"}, | |
| "bm_lewis": {"name": "Lewis (British Male)", "lang": "b", "description": "Friendly British male"}, | |
| } | |
| # Default voice | |
| DEFAULT_VOICE = "af_heart" | |
| class KokoroTTSService: | |
| """ | |
| Text-to-speech service using Kokoro TTS. | |
| High-quality, local, commercially-free (Apache 2.0 license). | |
| """ | |
| def __init__(self, voice: str = DEFAULT_VOICE, cache_dir: Optional[Path] = None): | |
| """ | |
| Initialize Kokoro TTS service. | |
| Args: | |
| voice: Voice ID (e.g., 'af_heart', 'am_adam') | |
| cache_dir: Optional custom cache directory | |
| """ | |
| self.voice = voice if voice in KOKORO_VOICES else DEFAULT_VOICE | |
| self.voice_info = KOKORO_VOICES.get(self.voice, KOKORO_VOICES[DEFAULT_VOICE]) | |
| self.lang_code = self.voice_info["lang"] | |
| # Initialize cache directory | |
| if cache_dir: | |
| self.cache_dir = Path(cache_dir) | |
| else: | |
| self.cache_dir = BASE_DIR / "media" / "voiceover" / "kokoro" | |
| self.cache_dir.mkdir(parents=True, exist_ok=True) | |
| # Lazy-load pipeline to avoid loading model until needed | |
| self._pipeline = None | |
| logger.info(f"KokoroTTS initialized with voice: {self.voice} ({self.voice_info['name']})") | |
| def _get_pipeline(self): | |
| """Lazy-load the Kokoro pipeline.""" | |
| if self._pipeline is None: | |
| try: | |
| from kokoro import KPipeline | |
| self._pipeline = KPipeline(lang_code=self.lang_code) | |
| logger.info(f"Kokoro pipeline loaded for language: {self.lang_code}") | |
| except ImportError as e: | |
| logger.error("Kokoro not installed. Install with: pip install kokoro>=0.9.4 soundfile") | |
| raise RuntimeError("Kokoro TTS not installed") from e | |
| return self._pipeline | |
| def generate_from_text(self, text: str, **kwargs) -> Path: | |
| """ | |
| Generate audio from text and return the path to the audio file. | |
| Args: | |
| text: Text to convert to speech | |
| **kwargs: Additional arguments (ignored for compatibility) | |
| Returns: | |
| Path to the generated audio file (MP3) | |
| """ | |
| if not text or not text.strip(): | |
| raise ValueError("Text cannot be empty") | |
| # Create cache key based on text and voice | |
| content_hash = hashlib.md5(f"{text}-{self.voice}".encode("utf-8")).hexdigest() | |
| output_path = self.cache_dir / f"{content_hash}.mp3" | |
| # Return cached file if exists | |
| if output_path.exists() and output_path.stat().st_size > 0: | |
| logger.info(f"Using cached Kokoro voiceover: {output_path.name}") | |
| return output_path | |
| logger.info(f"Generating Kokoro TTS ({self.voice}) for: {text[:50]}...") | |
| try: | |
| import soundfile as sf | |
| import numpy as np | |
| pipeline = self._get_pipeline() | |
| # Generate audio using Kokoro | |
| generator = pipeline(text, voice=self.voice, speed=1.0) | |
| # Collect all audio chunks | |
| audio_chunks = [] | |
| for i, (gs, ps, audio) in enumerate(generator): | |
| audio_chunks.append(audio) | |
| if not audio_chunks: | |
| raise Exception("No audio generated") | |
| # Concatenate all chunks | |
| full_audio = np.concatenate(audio_chunks) | |
| # Save as WAV first (Kokoro outputs 24kHz) | |
| wav_path = self.cache_dir / f"{content_hash}.wav" | |
| sf.write(str(wav_path), full_audio, 24000) | |
| # Convert to MP3 using ffmpeg for smaller file size | |
| import subprocess | |
| result = subprocess.run([ | |
| "ffmpeg", "-y", "-i", str(wav_path), | |
| "-acodec", "libmp3lame", "-ab", "192k", | |
| str(output_path) | |
| ], capture_output=True, text=True) | |
| # Clean up WAV file | |
| if wav_path.exists(): | |
| wav_path.unlink() | |
| if result.returncode != 0: | |
| # Fallback: use WAV if MP3 conversion fails | |
| logger.warning("MP3 conversion failed, using WAV") | |
| wav_path = self.cache_dir / f"{content_hash}.wav" | |
| sf.write(str(wav_path), full_audio, 24000) | |
| return wav_path | |
| if output_path.exists() and output_path.stat().st_size > 0: | |
| logger.info(f"✅ Kokoro TTS saved: {output_path} ({output_path.stat().st_size} bytes)") | |
| return output_path | |
| else: | |
| raise Exception("Audio file was not created or is empty") | |
| except Exception as e: | |
| logger.error(f"Kokoro TTS failed: {str(e)}") | |
| raise RuntimeError(f"Kokoro TTS generation failed: {str(e)}") | |
| # Alias for backward compatibility | |
| SimpleElevenLabsService = KokoroTTSService | |
| def get_available_voices() -> dict: | |
| """Return dictionary of available voices with their info.""" | |
| return KOKORO_VOICES | |
| def get_voice_names() -> list: | |
| """Return list of voice display names for UI.""" | |
| return [(vid, info["name"]) for vid, info in KOKORO_VOICES.items()] | |