badrex commited on
Commit
fe978b0
·
verified ·
1 Parent(s): ac4b361

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -15
app.py CHANGED
@@ -22,32 +22,53 @@ transcriber = pipeline("automatic-speech-recognition", model=MODEL_ID)
22
  # #y /= np.max(np.abs(y))
23
  # return transcriber({"sampling_rate": sr, "raw": y})["text"]
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  @spaces.GPU
26
  def transcribe(audio):
27
  sr, y = audio
28
 
29
- # Convert stereo → mono
30
  if y.ndim > 1:
31
  y = np.mean(y, axis=1)
32
 
33
- # Ensure float32
34
  y = y.astype(np.float32)
35
 
36
- # Normalize to [-1, 1] if it's not already
37
- if np.max(np.abs(y)) > 1.0:
38
- y /= np.max(np.abs(y))
 
39
 
40
- examples = []
41
- examples_dir = "examples"
42
- if os.path.exists(examples_dir):
43
- for filename in os.listdir(examples_dir):
44
- if filename.endswith((".wav", ".mp3", ".ogg")):
45
- examples.append([os.path.join(examples_dir, filename)])
46
-
47
- print(f"Found {len(examples)} example files")
48
- else:
49
- print("Examples directory not found")
50
 
 
51
 
52
  demo = gr.Interface(
53
  fn=transcribe,
 
22
  # #y /= np.max(np.abs(y))
23
  # return transcriber({"sampling_rate": sr, "raw": y})["text"]
24
 
25
+ # @spaces.GPU
26
+ # def transcribe(audio):
27
+ # sr, y = audio
28
+
29
+ # # Convert stereo → mono
30
+ # if y.ndim > 1:
31
+ # y = np.mean(y, axis=1)
32
+
33
+ # # Ensure float32
34
+ # y = y.astype(np.float32)
35
+
36
+ # # Normalize to [-1, 1] if it's not already
37
+ # if np.max(np.abs(y)) > 1.0:
38
+ # y /= np.max(np.abs(y))
39
+
40
+ # examples = []
41
+ # examples_dir = "examples"
42
+ # if os.path.exists(examples_dir):
43
+ # for filename in os.listdir(examples_dir):
44
+ # if filename.endswith((".wav", ".mp3", ".ogg")):
45
+ # examples.append([os.path.join(examples_dir, filename)])
46
+
47
+ # print(f"Found {len(examples)} example files")
48
+ # else:
49
+ # print("Examples directory not found")
50
+
51
  @spaces.GPU
52
  def transcribe(audio):
53
  sr, y = audio
54
 
 
55
  if y.ndim > 1:
56
  y = np.mean(y, axis=1)
57
 
 
58
  y = y.astype(np.float32)
59
 
60
+ # normalize to [-1, 1]
61
+ max_val = np.max(np.abs(y))
62
+ if max_val > 0:
63
+ y /= max_val
64
 
65
+ target_sr = transcriber.model.config.sampling_rate if hasattr(transcriber.model, "config") else 16000
66
+ if sr != target_sr:
67
+ import librosa
68
+ y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
69
+ sr = target_sr
 
 
 
 
 
70
 
71
+ return transcriber({"sampling_rate": sr, "raw": y})["text"]
72
 
73
  demo = gr.Interface(
74
  fn=transcribe,