Spaces:

aditya2001
/

VidSimplify

Running

App Files Files Community

Adityahulk commited on 14 days ago

Commit

1110dbd

1 Parent(s): 0e6884d

Revert "fucking random fix"

Browse files

This reverts commit 0e6884df187e3d5ad1bf0959dfc68457070be67b.

Files changed (2) hide show

manimator/scene/voiceover_scene.py +20 -18
manimator/services/voiceover.py +27 -37

manimator/scene/voiceover_scene.py CHANGED Viewed

@@ -55,36 +55,38 @@ class VoiceoverScene(Scene):
         # Add audio to scene - use absolute path string
         self.add_sound(str(absolute_audio_path))
-        # Get duration - pass text for fallback estimation if audio reading fails
-        duration = self._get_audio_duration(absolute_audio_path, text)
         return _VoiceoverContext(self, duration)
-    def _get_audio_duration(self, file_path: Path, text: str = "") -> float:
         """
         Get duration of mp3 file.
-        Uses mutagen if available, otherwise estimates from file size or text length.
         """
-        min_duration = max(2.0, len(text) / 15.0) if text else 2.0  # Minimum based on text length
         try:
             from mutagen.mp3 import MP3
             audio = MP3(file_path)
-            duration = audio.info.length
-            if duration > 0.5:
-                return duration
-            # If mutagen returns too short, use minimum
-            logger.warning(f"Mutagen returned short duration ({duration}s), using minimum")
-            return min_duration
         except ImportError:
-            logger.warning("mutagen not found, estimating duration")
-            size_bytes = file_path.stat().st_size if file_path.exists() else 0
-            if size_bytes > 1000:
-                return max(min_duration, size_bytes / 16000.0)
-            return min_duration
         except Exception as e:
             logger.error(f"Error reading audio duration: {e}")
-            return min_duration
 class _VoiceoverContext:
     """Context manager helper"""

         # Add audio to scene - use absolute path string
         self.add_sound(str(absolute_audio_path))
+        # Calculate duration (approximate or exact if we could read metadata)
+        # For now, we rely on Manim's add_sound to handle playback.
+        # But we need to know how long to wait.
+        # We need to get the duration of the audio file.
+        # Since we want to avoid heavy dependencies like pydub/sox if possible,
+        # we can try a lightweight approach or just use mutagen if available.
+        # Given the environment has 'manim', it likely has tools to read audio duration.
+        duration = self._get_audio_duration(absolute_audio_path)
         return _VoiceoverContext(self, duration)
+    def _get_audio_duration(self, file_path: Path) -> float:
         """
         Get duration of mp3 file.
+        Uses mutagen if available (installed by manim-voiceover), otherwise estimates.
         """
         try:
             from mutagen.mp3 import MP3
             audio = MP3(file_path)
+            return audio.info.length
         except ImportError:
+            logger.warning("mutagen not found, estimating duration based on file size")
+            # Rough estimate: 1MB ~ 1 minute for 128kbps mp3
+            # This is a fallback and might be inaccurate
+            size_bytes = file_path.stat().st_size
+            # 128 kbps = 16 KB/s
+            return size_bytes / 16000.0
         except Exception as e:
             logger.error(f"Error reading audio duration: {e}")
+            return 2.0 # Safe default fallback
 class _VoiceoverContext:
     """Context manager helper"""

manimator/services/voiceover.py CHANGED Viewed

@@ -123,13 +123,12 @@ class SimpleElevenLabsService:
     def _generate_with_edge_tts(self, text: str) -> Path:
         """
         Fallback generation using Microsoft Edge TTS (free, high quality).
-        Uses 'python -m edge_tts' CLI to avoid asyncio conflicts.
         """
         try:
-            import subprocess
-            import sys
-            # Use absolute path for edge-tts cache
             edge_cache_dir = BASE_DIR / "media" / "voiceover" / "edge_tts"
             edge_cache_dir.mkdir(parents=True, exist_ok=True)
@@ -139,45 +138,36 @@ class SimpleElevenLabsService:
             content_hash = hashlib.md5(f"{text}-{edge_voice}".encode("utf-8")).hexdigest()
             output_path = edge_cache_dir / f"{content_hash}.mp3"
-            # Check cache
-            if output_path.exists() and output_path.stat().st_size > 1024:
                 logger.info(f"Using cached Edge TTS voiceover for hash {content_hash}")
                 return output_path
-            logger.info(f"Generating Edge TTS ({edge_voice}) for: {text[:50]}...")
-            # Use python -m edge_tts - this is guaranteed to work since edge-tts is installed
-            cmd = [
-                sys.executable,  # Use the same Python interpreter
-                "-m", "edge_tts",
-                "--voice", edge_voice,
-                "--text", text,
-                "--write-media", str(output_path)
-            ]
-            logger.info(f"Running: {' '.join(cmd[:4])}...")
-            result = subprocess.run(
-                cmd,
-                capture_output=True,
-                text=True,
-                timeout=120,  # 2 minute timeout for network
-                cwd=str(BASE_DIR)  # Run from project root
-            )
-            if result.returncode != 0:
-                logger.error(f"Edge TTS failed: {result.stderr}")
-                raise Exception(f"Edge TTS failed: {result.stderr}")
-            # Verify file was created
-            if output_path.exists() and output_path.stat().st_size > 1024:
-                logger.info(f"✅ Edge TTS audio saved: {output_path} ({output_path.stat().st_size} bytes)")
-                return output_path
             else:
-                raise Exception("Edge TTS created empty or invalid file")
         except Exception as e:
-            logger.error(f"Edge TTS failed: {e}. Falling back to gTTS.")
             return self._generate_with_gtts(text)
     def _generate_with_gtts(self, text: str) -> Path:

     def _generate_with_edge_tts(self, text: str) -> Path:
         """
         Fallback generation using Microsoft Edge TTS (free, high quality).
+        Uses neural voices that sound natural and professional.
         """
         try:
+            import edge_tts
+            # Use absolute path for edge-tts cache (important for containerized environments)
             edge_cache_dir = BASE_DIR / "media" / "voiceover" / "edge_tts"
             edge_cache_dir.mkdir(parents=True, exist_ok=True)
             content_hash = hashlib.md5(f"{text}-{edge_voice}".encode("utf-8")).hexdigest()
             output_path = edge_cache_dir / f"{content_hash}.mp3"
+            if output_path.exists() and output_path.stat().st_size > 0:
                 logger.info(f"Using cached Edge TTS voiceover for hash {content_hash}")
                 return output_path
+            logger.info(f"Generating Edge TTS ({edge_voice}) for: {text[:30]}...")
+            # Edge-tts is async, so we need to run it in an event loop
+            async def _generate():
+                communicate = edge_tts.Communicate(text, edge_voice)
+                await communicate.save(str(output_path))
+            # Run the async function
+            try:
+                loop = asyncio.get_event_loop()
+            except RuntimeError:
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+            loop.run_until_complete(_generate())
+            # Verify file was created successfully
+            if output_path.exists() and output_path.stat().st_size > 0:
+                logger.info(f"✅ Edge TTS voiceover saved: {output_path} ({output_path.stat().st_size} bytes)")
             else:
+                raise Exception("Edge TTS audio file was not created or is empty")
+            return output_path
         except Exception as e:
+            logger.error(f"Edge TTS failed: {str(e)}. Falling back to gTTS.")
             return self._generate_with_gtts(text)
     def _generate_with_gtts(self, text: str) -> Path: