Spaces:

badrex
/

JASRv1.1

Running on Zero

badrex commited on Oct 8

Commit

fe978b0

verified ·

1 Parent(s): ac4b361

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,32 +22,53 @@ transcriber = pipeline("automatic-speech-recognition", model=MODEL_ID)
 #     #y /= np.max(np.abs(y))
 #     return transcriber({"sampling_rate": sr, "raw": y})["text"]
 @spaces.GPU
 def transcribe(audio):
     sr, y = audio
-    # Convert stereo → mono
     if y.ndim > 1:
         y = np.mean(y, axis=1)
-    # Ensure float32
     y = y.astype(np.float32)
-    # Normalize to [-1, 1] if it's not already
-    if np.max(np.abs(y)) > 1.0:
-        y /= np.max(np.abs(y))
-examples = []
-examples_dir = "examples"
-if os.path.exists(examples_dir):
-    for filename in os.listdir(examples_dir):
-        if filename.endswith((".wav", ".mp3", ".ogg")):
-            examples.append([os.path.join(examples_dir, filename)])
-    print(f"Found {len(examples)} example files")
-else:
-    print("Examples directory not found")
 demo = gr.Interface(
     fn=transcribe,

 #     #y /= np.max(np.abs(y))
 #     return transcriber({"sampling_rate": sr, "raw": y})["text"]
+# @spaces.GPU
+# def transcribe(audio):
+#     sr, y = audio
+#     # Convert stereo → mono
+#     if y.ndim > 1:
+#         y = np.mean(y, axis=1)
+#     # Ensure float32
+#     y = y.astype(np.float32)
+#     # Normalize to [-1, 1] if it's not already
+#     if np.max(np.abs(y)) > 1.0:
+#         y /= np.max(np.abs(y))
+# examples = []
+# examples_dir = "examples"
+# if os.path.exists(examples_dir):
+#     for filename in os.listdir(examples_dir):
+#         if filename.endswith((".wav", ".mp3", ".ogg")):
+#             examples.append([os.path.join(examples_dir, filename)])
+#     print(f"Found {len(examples)} example files")
+# else:
+#     print("Examples directory not found")
 @spaces.GPU
 def transcribe(audio):
     sr, y = audio
     if y.ndim > 1:
         y = np.mean(y, axis=1)
     y = y.astype(np.float32)
+    # normalize to [-1, 1]
+    max_val = np.max(np.abs(y))
+    if max_val > 0:
+        y /= max_val
+    target_sr = transcriber.model.config.sampling_rate if hasattr(transcriber.model, "config") else 16000
+    if sr != target_sr:
+        import librosa
+        y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
+        sr = target_sr
+    return transcriber({"sampling_rate": sr, "raw": y})["text"]
 demo = gr.Interface(
     fn=transcribe,