Adityahulk commited on
Commit
1110dbd
·
1 Parent(s): 0e6884d

Revert "fucking random fix"

Browse files

This reverts commit 0e6884df187e3d5ad1bf0959dfc68457070be67b.

manimator/scene/voiceover_scene.py CHANGED
@@ -55,36 +55,38 @@ class VoiceoverScene(Scene):
55
  # Add audio to scene - use absolute path string
56
  self.add_sound(str(absolute_audio_path))
57
 
58
- # Get duration - pass text for fallback estimation if audio reading fails
59
- duration = self._get_audio_duration(absolute_audio_path, text)
 
 
 
 
 
 
 
 
60
 
61
  return _VoiceoverContext(self, duration)
62
 
63
- def _get_audio_duration(self, file_path: Path, text: str = "") -> float:
64
  """
65
  Get duration of mp3 file.
66
- Uses mutagen if available, otherwise estimates from file size or text length.
67
  """
68
- min_duration = max(2.0, len(text) / 15.0) if text else 2.0 # Minimum based on text length
69
-
70
  try:
71
  from mutagen.mp3 import MP3
72
  audio = MP3(file_path)
73
- duration = audio.info.length
74
- if duration > 0.5:
75
- return duration
76
- # If mutagen returns too short, use minimum
77
- logger.warning(f"Mutagen returned short duration ({duration}s), using minimum")
78
- return min_duration
79
  except ImportError:
80
- logger.warning("mutagen not found, estimating duration")
81
- size_bytes = file_path.stat().st_size if file_path.exists() else 0
82
- if size_bytes > 1000:
83
- return max(min_duration, size_bytes / 16000.0)
84
- return min_duration
 
85
  except Exception as e:
86
  logger.error(f"Error reading audio duration: {e}")
87
- return min_duration
88
 
89
  class _VoiceoverContext:
90
  """Context manager helper"""
 
55
  # Add audio to scene - use absolute path string
56
  self.add_sound(str(absolute_audio_path))
57
 
58
+ # Calculate duration (approximate or exact if we could read metadata)
59
+ # For now, we rely on Manim's add_sound to handle playback.
60
+ # But we need to know how long to wait.
61
+
62
+ # We need to get the duration of the audio file.
63
+ # Since we want to avoid heavy dependencies like pydub/sox if possible,
64
+ # we can try a lightweight approach or just use mutagen if available.
65
+ # Given the environment has 'manim', it likely has tools to read audio duration.
66
+
67
+ duration = self._get_audio_duration(absolute_audio_path)
68
 
69
  return _VoiceoverContext(self, duration)
70
 
71
+ def _get_audio_duration(self, file_path: Path) -> float:
72
  """
73
  Get duration of mp3 file.
74
+ Uses mutagen if available (installed by manim-voiceover), otherwise estimates.
75
  """
 
 
76
  try:
77
  from mutagen.mp3 import MP3
78
  audio = MP3(file_path)
79
+ return audio.info.length
 
 
 
 
 
80
  except ImportError:
81
+ logger.warning("mutagen not found, estimating duration based on file size")
82
+ # Rough estimate: 1MB ~ 1 minute for 128kbps mp3
83
+ # This is a fallback and might be inaccurate
84
+ size_bytes = file_path.stat().st_size
85
+ # 128 kbps = 16 KB/s
86
+ return size_bytes / 16000.0
87
  except Exception as e:
88
  logger.error(f"Error reading audio duration: {e}")
89
+ return 2.0 # Safe default fallback
90
 
91
  class _VoiceoverContext:
92
  """Context manager helper"""
manimator/services/voiceover.py CHANGED
@@ -123,13 +123,12 @@ class SimpleElevenLabsService:
123
  def _generate_with_edge_tts(self, text: str) -> Path:
124
  """
125
  Fallback generation using Microsoft Edge TTS (free, high quality).
126
- Uses 'python -m edge_tts' CLI to avoid asyncio conflicts.
127
  """
128
  try:
129
- import subprocess
130
- import sys
131
 
132
- # Use absolute path for edge-tts cache
133
  edge_cache_dir = BASE_DIR / "media" / "voiceover" / "edge_tts"
134
  edge_cache_dir.mkdir(parents=True, exist_ok=True)
135
 
@@ -139,45 +138,36 @@ class SimpleElevenLabsService:
139
  content_hash = hashlib.md5(f"{text}-{edge_voice}".encode("utf-8")).hexdigest()
140
  output_path = edge_cache_dir / f"{content_hash}.mp3"
141
 
142
- # Check cache
143
- if output_path.exists() and output_path.stat().st_size > 1024:
144
  logger.info(f"Using cached Edge TTS voiceover for hash {content_hash}")
145
  return output_path
146
 
147
- logger.info(f"Generating Edge TTS ({edge_voice}) for: {text[:50]}...")
148
-
149
- # Use python -m edge_tts - this is guaranteed to work since edge-tts is installed
150
- cmd = [
151
- sys.executable, # Use the same Python interpreter
152
- "-m", "edge_tts",
153
- "--voice", edge_voice,
154
- "--text", text,
155
- "--write-media", str(output_path)
156
- ]
157
-
158
- logger.info(f"Running: {' '.join(cmd[:4])}...")
159
-
160
- result = subprocess.run(
161
- cmd,
162
- capture_output=True,
163
- text=True,
164
- timeout=120, # 2 minute timeout for network
165
- cwd=str(BASE_DIR) # Run from project root
166
- )
167
-
168
- if result.returncode != 0:
169
- logger.error(f"Edge TTS failed: {result.stderr}")
170
- raise Exception(f"Edge TTS failed: {result.stderr}")
171
-
172
- # Verify file was created
173
- if output_path.exists() and output_path.stat().st_size > 1024:
174
- logger.info(f"✅ Edge TTS audio saved: {output_path} ({output_path.stat().st_size} bytes)")
175
- return output_path
176
  else:
177
- raise Exception("Edge TTS created empty or invalid file")
178
 
 
 
179
  except Exception as e:
180
- logger.error(f"Edge TTS failed: {e}. Falling back to gTTS.")
181
  return self._generate_with_gtts(text)
182
 
183
  def _generate_with_gtts(self, text: str) -> Path:
 
123
  def _generate_with_edge_tts(self, text: str) -> Path:
124
  """
125
  Fallback generation using Microsoft Edge TTS (free, high quality).
126
+ Uses neural voices that sound natural and professional.
127
  """
128
  try:
129
+ import edge_tts
 
130
 
131
+ # Use absolute path for edge-tts cache (important for containerized environments)
132
  edge_cache_dir = BASE_DIR / "media" / "voiceover" / "edge_tts"
133
  edge_cache_dir.mkdir(parents=True, exist_ok=True)
134
 
 
138
  content_hash = hashlib.md5(f"{text}-{edge_voice}".encode("utf-8")).hexdigest()
139
  output_path = edge_cache_dir / f"{content_hash}.mp3"
140
 
141
+ if output_path.exists() and output_path.stat().st_size > 0:
 
142
  logger.info(f"Using cached Edge TTS voiceover for hash {content_hash}")
143
  return output_path
144
 
145
+ logger.info(f"Generating Edge TTS ({edge_voice}) for: {text[:30]}...")
146
+
147
+ # Edge-tts is async, so we need to run it in an event loop
148
+ async def _generate():
149
+ communicate = edge_tts.Communicate(text, edge_voice)
150
+ await communicate.save(str(output_path))
151
+
152
+ # Run the async function
153
+ try:
154
+ loop = asyncio.get_event_loop()
155
+ except RuntimeError:
156
+ loop = asyncio.new_event_loop()
157
+ asyncio.set_event_loop(loop)
158
+
159
+ loop.run_until_complete(_generate())
160
+
161
+ # Verify file was created successfully
162
+ if output_path.exists() and output_path.stat().st_size > 0:
163
+ logger.info(f"✅ Edge TTS voiceover saved: {output_path} ({output_path.stat().st_size} bytes)")
 
 
 
 
 
 
 
 
 
 
164
  else:
165
+ raise Exception("Edge TTS audio file was not created or is empty")
166
 
167
+ return output_path
168
+
169
  except Exception as e:
170
+ logger.error(f"Edge TTS failed: {str(e)}. Falling back to gTTS.")
171
  return self._generate_with_gtts(text)
172
 
173
  def _generate_with_gtts(self, text: str) -> Path: