Spaces:

aditya2001
/

VidSimplify

Sleeping

App Files Files Community

Adityahulk commited on 15 days ago

Commit

0e6884d

1 Parent(s): 0ed929a

fucking random fix

Browse files

Files changed (2) hide show

manimator/scene/voiceover_scene.py +18 -20
manimator/services/voiceover.py +37 -27

manimator/scene/voiceover_scene.py CHANGED Viewed

@@ -55,38 +55,36 @@ class VoiceoverScene(Scene):
         # Add audio to scene - use absolute path string
         self.add_sound(str(absolute_audio_path))
-        # Calculate duration (approximate or exact if we could read metadata)
-        # For now, we rely on Manim's add_sound to handle playback.
-        # But we need to know how long to wait.
-        # We need to get the duration of the audio file.
-        # Since we want to avoid heavy dependencies like pydub/sox if possible,
-        # we can try a lightweight approach or just use mutagen if available.
-        # Given the environment has 'manim', it likely has tools to read audio duration.
-        duration = self._get_audio_duration(absolute_audio_path)
         return _VoiceoverContext(self, duration)
-    def _get_audio_duration(self, file_path: Path) -> float:
         """
         Get duration of mp3 file.
-        Uses mutagen if available (installed by manim-voiceover), otherwise estimates.
         """
         try:
             from mutagen.mp3 import MP3
             audio = MP3(file_path)
-            return audio.info.length
         except ImportError:
-            logger.warning("mutagen not found, estimating duration based on file size")
-            # Rough estimate: 1MB ~ 1 minute for 128kbps mp3
-            # This is a fallback and might be inaccurate
-            size_bytes = file_path.stat().st_size
-            # 128 kbps = 16 KB/s
-            return size_bytes / 16000.0
         except Exception as e:
             logger.error(f"Error reading audio duration: {e}")
-            return 2.0 # Safe default fallback
 class _VoiceoverContext:
     """Context manager helper"""

         # Add audio to scene - use absolute path string
         self.add_sound(str(absolute_audio_path))
+        # Get duration - pass text for fallback estimation if audio reading fails
+        duration = self._get_audio_duration(absolute_audio_path, text)
         return _VoiceoverContext(self, duration)
+    def _get_audio_duration(self, file_path: Path, text: str = "") -> float:
         """
         Get duration of mp3 file.
+        Uses mutagen if available, otherwise estimates from file size or text length.
         """
+        min_duration = max(2.0, len(text) / 15.0) if text else 2.0  # Minimum based on text length
         try:
             from mutagen.mp3 import MP3
             audio = MP3(file_path)
+            duration = audio.info.length
+            if duration > 0.5:
+                return duration
+            # If mutagen returns too short, use minimum
+            logger.warning(f"Mutagen returned short duration ({duration}s), using minimum")
+            return min_duration
         except ImportError:
+            logger.warning("mutagen not found, estimating duration")
+            size_bytes = file_path.stat().st_size if file_path.exists() else 0
+            if size_bytes > 1000:
+                return max(min_duration, size_bytes / 16000.0)
+            return min_duration
         except Exception as e:
             logger.error(f"Error reading audio duration: {e}")
+            return min_duration
 class _VoiceoverContext:
     """Context manager helper"""

manimator/services/voiceover.py CHANGED Viewed

@@ -123,12 +123,13 @@ class SimpleElevenLabsService:
     def _generate_with_edge_tts(self, text: str) -> Path:
         """
         Fallback generation using Microsoft Edge TTS (free, high quality).
-        Uses neural voices that sound natural and professional.
         """
         try:
-            import edge_tts
-            # Use absolute path for edge-tts cache (important for containerized environments)
             edge_cache_dir = BASE_DIR / "media" / "voiceover" / "edge_tts"
             edge_cache_dir.mkdir(parents=True, exist_ok=True)
@@ -138,36 +139,45 @@ class SimpleElevenLabsService:
             content_hash = hashlib.md5(f"{text}-{edge_voice}".encode("utf-8")).hexdigest()
             output_path = edge_cache_dir / f"{content_hash}.mp3"
-            if output_path.exists() and output_path.stat().st_size > 0:
                 logger.info(f"Using cached Edge TTS voiceover for hash {content_hash}")
                 return output_path
-            logger.info(f"Generating Edge TTS ({edge_voice}) for: {text[:30]}...")
-            # Edge-tts is async, so we need to run it in an event loop
-            async def _generate():
-                communicate = edge_tts.Communicate(text, edge_voice)
-                await communicate.save(str(output_path))
-            # Run the async function
-            try:
-                loop = asyncio.get_event_loop()
-            except RuntimeError:
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-            loop.run_until_complete(_generate())
-            # Verify file was created successfully
-            if output_path.exists() and output_path.stat().st_size > 0:
-                logger.info(f"✅ Edge TTS voiceover saved: {output_path} ({output_path.stat().st_size} bytes)")
             else:
-                raise Exception("Edge TTS audio file was not created or is empty")
-            return output_path
         except Exception as e:
-            logger.error(f"Edge TTS failed: {str(e)}. Falling back to gTTS.")
             return self._generate_with_gtts(text)
     def _generate_with_gtts(self, text: str) -> Path:

     def _generate_with_edge_tts(self, text: str) -> Path:
         """
         Fallback generation using Microsoft Edge TTS (free, high quality).
+        Uses 'python -m edge_tts' CLI to avoid asyncio conflicts.
         """
         try:
+            import subprocess
+            import sys
+            # Use absolute path for edge-tts cache
             edge_cache_dir = BASE_DIR / "media" / "voiceover" / "edge_tts"
             edge_cache_dir.mkdir(parents=True, exist_ok=True)
             content_hash = hashlib.md5(f"{text}-{edge_voice}".encode("utf-8")).hexdigest()
             output_path = edge_cache_dir / f"{content_hash}.mp3"
+            # Check cache
+            if output_path.exists() and output_path.stat().st_size > 1024:
                 logger.info(f"Using cached Edge TTS voiceover for hash {content_hash}")
                 return output_path
+            logger.info(f"Generating Edge TTS ({edge_voice}) for: {text[:50]}...")
+            # Use python -m edge_tts - this is guaranteed to work since edge-tts is installed
+            cmd = [
+                sys.executable,  # Use the same Python interpreter
+                "-m", "edge_tts",
+                "--voice", edge_voice,
+                "--text", text,
+                "--write-media", str(output_path)
+            ]
+            logger.info(f"Running: {' '.join(cmd[:4])}...")
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=120,  # 2 minute timeout for network
+                cwd=str(BASE_DIR)  # Run from project root
+            )
+            if result.returncode != 0:
+                logger.error(f"Edge TTS failed: {result.stderr}")
+                raise Exception(f"Edge TTS failed: {result.stderr}")
+            # Verify file was created
+            if output_path.exists() and output_path.stat().st_size > 1024:
+                logger.info(f"✅ Edge TTS audio saved: {output_path} ({output_path.stat().st_size} bytes)")
+                return output_path
             else:
+                raise Exception("Edge TTS created empty or invalid file")
         except Exception as e:
+            logger.error(f"Edge TTS failed: {e}. Falling back to gTTS.")
             return self._generate_with_gtts(text)
     def _generate_with_gtts(self, text: str) -> Path: