Adityahulk commited on
Commit
0e6884d
·
1 Parent(s): 0ed929a

fucking random fix

Browse files
manimator/scene/voiceover_scene.py CHANGED
@@ -55,38 +55,36 @@ class VoiceoverScene(Scene):
55
  # Add audio to scene - use absolute path string
56
  self.add_sound(str(absolute_audio_path))
57
 
58
- # Calculate duration (approximate or exact if we could read metadata)
59
- # For now, we rely on Manim's add_sound to handle playback.
60
- # But we need to know how long to wait.
61
-
62
- # We need to get the duration of the audio file.
63
- # Since we want to avoid heavy dependencies like pydub/sox if possible,
64
- # we can try a lightweight approach or just use mutagen if available.
65
- # Given the environment has 'manim', it likely has tools to read audio duration.
66
-
67
- duration = self._get_audio_duration(absolute_audio_path)
68
 
69
  return _VoiceoverContext(self, duration)
70
 
71
- def _get_audio_duration(self, file_path: Path) -> float:
72
  """
73
  Get duration of mp3 file.
74
- Uses mutagen if available (installed by manim-voiceover), otherwise estimates.
75
  """
 
 
76
  try:
77
  from mutagen.mp3 import MP3
78
  audio = MP3(file_path)
79
- return audio.info.length
 
 
 
 
 
80
  except ImportError:
81
- logger.warning("mutagen not found, estimating duration based on file size")
82
- # Rough estimate: 1MB ~ 1 minute for 128kbps mp3
83
- # This is a fallback and might be inaccurate
84
- size_bytes = file_path.stat().st_size
85
- # 128 kbps = 16 KB/s
86
- return size_bytes / 16000.0
87
  except Exception as e:
88
  logger.error(f"Error reading audio duration: {e}")
89
- return 2.0 # Safe default fallback
90
 
91
  class _VoiceoverContext:
92
  """Context manager helper"""
 
55
  # Add audio to scene - use absolute path string
56
  self.add_sound(str(absolute_audio_path))
57
 
58
+ # Get duration - pass text for fallback estimation if audio reading fails
59
+ duration = self._get_audio_duration(absolute_audio_path, text)
 
 
 
 
 
 
 
 
60
 
61
  return _VoiceoverContext(self, duration)
62
 
63
+ def _get_audio_duration(self, file_path: Path, text: str = "") -> float:
64
  """
65
  Get duration of mp3 file.
66
+ Uses mutagen if available, otherwise estimates from file size or text length.
67
  """
68
+ min_duration = max(2.0, len(text) / 15.0) if text else 2.0 # Minimum based on text length
69
+
70
  try:
71
  from mutagen.mp3 import MP3
72
  audio = MP3(file_path)
73
+ duration = audio.info.length
74
+ if duration > 0.5:
75
+ return duration
76
+ # If mutagen returns too short, use minimum
77
+ logger.warning(f"Mutagen returned short duration ({duration}s), using minimum")
78
+ return min_duration
79
  except ImportError:
80
+ logger.warning("mutagen not found, estimating duration")
81
+ size_bytes = file_path.stat().st_size if file_path.exists() else 0
82
+ if size_bytes > 1000:
83
+ return max(min_duration, size_bytes / 16000.0)
84
+ return min_duration
 
85
  except Exception as e:
86
  logger.error(f"Error reading audio duration: {e}")
87
+ return min_duration
88
 
89
  class _VoiceoverContext:
90
  """Context manager helper"""
manimator/services/voiceover.py CHANGED
@@ -123,12 +123,13 @@ class SimpleElevenLabsService:
123
  def _generate_with_edge_tts(self, text: str) -> Path:
124
  """
125
  Fallback generation using Microsoft Edge TTS (free, high quality).
126
- Uses neural voices that sound natural and professional.
127
  """
128
  try:
129
- import edge_tts
 
130
 
131
- # Use absolute path for edge-tts cache (important for containerized environments)
132
  edge_cache_dir = BASE_DIR / "media" / "voiceover" / "edge_tts"
133
  edge_cache_dir.mkdir(parents=True, exist_ok=True)
134
 
@@ -138,36 +139,45 @@ class SimpleElevenLabsService:
138
  content_hash = hashlib.md5(f"{text}-{edge_voice}".encode("utf-8")).hexdigest()
139
  output_path = edge_cache_dir / f"{content_hash}.mp3"
140
 
141
- if output_path.exists() and output_path.stat().st_size > 0:
 
142
  logger.info(f"Using cached Edge TTS voiceover for hash {content_hash}")
143
  return output_path
144
 
145
- logger.info(f"Generating Edge TTS ({edge_voice}) for: {text[:30]}...")
146
-
147
- # Edge-tts is async, so we need to run it in an event loop
148
- async def _generate():
149
- communicate = edge_tts.Communicate(text, edge_voice)
150
- await communicate.save(str(output_path))
151
-
152
- # Run the async function
153
- try:
154
- loop = asyncio.get_event_loop()
155
- except RuntimeError:
156
- loop = asyncio.new_event_loop()
157
- asyncio.set_event_loop(loop)
158
-
159
- loop.run_until_complete(_generate())
160
-
161
- # Verify file was created successfully
162
- if output_path.exists() and output_path.stat().st_size > 0:
163
- logger.info(f"✅ Edge TTS voiceover saved: {output_path} ({output_path.stat().st_size} bytes)")
 
 
 
 
 
 
 
 
 
 
164
  else:
165
- raise Exception("Edge TTS audio file was not created or is empty")
166
 
167
- return output_path
168
-
169
  except Exception as e:
170
- logger.error(f"Edge TTS failed: {str(e)}. Falling back to gTTS.")
171
  return self._generate_with_gtts(text)
172
 
173
  def _generate_with_gtts(self, text: str) -> Path:
 
123
  def _generate_with_edge_tts(self, text: str) -> Path:
124
  """
125
  Fallback generation using Microsoft Edge TTS (free, high quality).
126
+ Uses 'python -m edge_tts' CLI to avoid asyncio conflicts.
127
  """
128
  try:
129
+ import subprocess
130
+ import sys
131
 
132
+ # Use absolute path for edge-tts cache
133
  edge_cache_dir = BASE_DIR / "media" / "voiceover" / "edge_tts"
134
  edge_cache_dir.mkdir(parents=True, exist_ok=True)
135
 
 
139
  content_hash = hashlib.md5(f"{text}-{edge_voice}".encode("utf-8")).hexdigest()
140
  output_path = edge_cache_dir / f"{content_hash}.mp3"
141
 
142
+ # Check cache
143
+ if output_path.exists() and output_path.stat().st_size > 1024:
144
  logger.info(f"Using cached Edge TTS voiceover for hash {content_hash}")
145
  return output_path
146
 
147
+ logger.info(f"Generating Edge TTS ({edge_voice}) for: {text[:50]}...")
148
+
149
+ # Use python -m edge_tts - this is guaranteed to work since edge-tts is installed
150
+ cmd = [
151
+ sys.executable, # Use the same Python interpreter
152
+ "-m", "edge_tts",
153
+ "--voice", edge_voice,
154
+ "--text", text,
155
+ "--write-media", str(output_path)
156
+ ]
157
+
158
+ logger.info(f"Running: {' '.join(cmd[:4])}...")
159
+
160
+ result = subprocess.run(
161
+ cmd,
162
+ capture_output=True,
163
+ text=True,
164
+ timeout=120, # 2 minute timeout for network
165
+ cwd=str(BASE_DIR) # Run from project root
166
+ )
167
+
168
+ if result.returncode != 0:
169
+ logger.error(f"Edge TTS failed: {result.stderr}")
170
+ raise Exception(f"Edge TTS failed: {result.stderr}")
171
+
172
+ # Verify file was created
173
+ if output_path.exists() and output_path.stat().st_size > 1024:
174
+ logger.info(f"✅ Edge TTS audio saved: {output_path} ({output_path.stat().st_size} bytes)")
175
+ return output_path
176
  else:
177
+ raise Exception("Edge TTS created empty or invalid file")
178
 
 
 
179
  except Exception as e:
180
+ logger.error(f"Edge TTS failed: {e}. Falling back to gTTS.")
181
  return self._generate_with_gtts(text)
182
 
183
  def _generate_with_gtts(self, text: str) -> Path: