Spaces:
Running
Running
File size: 6,325 Bytes
efaf26b 9f65ebc efaf26b 9f65ebc 2630524 6d576a6 22e8728 efaf26b 6d576a6 efaf26b 6d576a6 aa19b68 2630524 6d576a6 2630524 df57e44 e4f57af 14becec e4f57af df57e44 e4f57af efaf26b 6d576a6 efaf26b 6d576a6 22e8728 efaf26b 6d576a6 efaf26b 6d576a6 22e8728 6d576a6 efaf26b 6d576a6 efaf26b e4f57af df57e44 efaf26b 6d576a6 efaf26b 6d576a6 efaf26b 2630524 efaf26b 6d576a6 df57e44 6d576a6 efaf26b 22e8728 df57e44 efaf26b e4f57af efaf26b df57e44 e4f57af 6d576a6 df57e44 6d576a6 df57e44 e4f57af 6d576a6 2630524 6d576a6 22e8728 6d576a6 efaf26b 6d576a6 e4f57af 6d576a6 e4f57af 6d576a6 e4f57af efaf26b df57e44 efaf26b df57e44 6d576a6 9f65ebc cd19dc0 14becec cd19dc0 efaf26b 2630524 6d576a6 2630524 6d576a6 efaf26b e4f57af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import os
import json
import mimetypes
import gradio as gr
import google.generativeai as genai
from pydub import AudioSegment
import uuid
import time
import threading
# -----------------------
# Канфігурацыя
# -----------------------
GEMINI_API_KEY = os.getenv("gemini")
if not GEMINI_API_KEY:
raise ValueError("Не знойдзены API ключ для Gemini.")
genai.configure(api_key=GEMINI_API_KEY)
generation_config = {
"temperature": 0.35,
"top_p": 0.95,
"top_k": 64,
"max_output_tokens": 65536,
"response_mime_type": "application/json",
}
model = genai.GenerativeModel(
model_name="gemini-2.5-flash",
generation_config=generation_config,
system_instruction="""transcribe
format example:
"start": 858.37,
"end": 859.56,
"text": "Калі бяспечна, безумоўна."
""",
)
# -----------------------
# Прагрэс-анімацыя (фонавы паток)
# -----------------------
def progress_animation(status_callback, stop_event):
frames = ["⏳", "⏳.", "⏳..", "⏳..."]
while not stop_event.is_set():
for frame in frames:
if stop_event.is_set():
break
status_callback(f"Транскрыпцыя ідзе {frame}")
time.sleep(0.6)
# -----------------------
# Асноўныя функцыі
# -----------------------
def upload_to_gemini(path, status_callback):
mime_type, _ = mimetypes.guess_type(path)
status_callback(f"📤 Загружаем файл у Gemini...")
file = genai.upload_file(path, mime_type=mime_type)
status_callback("✅ Файл загружаны.")
return file
def transcribe_audio(audio_path, status_callback):
try:
status_callback("🔍 Падрыхтоўка транскрыпцыі...")
file_obj = upload_to_gemini(audio_path, status_callback)
stop_event = threading.Event()
t = threading.Thread(target=progress_animation, args=(status_callback, stop_event))
t.start()
chat = model.start_chat(history=[])
response = chat.send_message(file_obj)
stop_event.set()
t.join()
if not response.text:
return "❌ Пусты адказ ад мадэлі."
with open("last_response.json", "w", encoding="utf-8") as f:
f.write(response.text)
status_callback("📥 Апрацоўка транскрыпцыі...")
transcripts = json.loads(response.text)
status_callback(f"✅ Гатова: {len(transcripts)} фрагментаў.")
return transcripts
except Exception as e:
return f"Памылка: {e}"
def seconds_to_timestamp(sec: float) -> str:
h, remainder = divmod(sec, 3600)
m, remainder = divmod(remainder, 60)
s = int(remainder)
ms = int(round((remainder - s) * 1000))
return f"{int(h):02d}:{int(m):02d}:{s:02d},{ms:03d}"
def transcripts_to_srt(transcripts, filename="subtitles.srt"):
try:
srt_lines = []
for idx, seg in enumerate(transcripts, start=1):
start_ts = seconds_to_timestamp(seg["start"])
end_ts = seconds_to_timestamp(seg["end"])
srt_lines.append(f"{idx}\n{start_ts} --> {end_ts}\n{seg['text']}\n")
content = "\n".join(srt_lines)
with open(filename, "w", encoding="utf-8") as f:
f.write(content)
return content, filename
except Exception as e:
return f"Памылка пры запісе SRT: {e}", None
def extract_audio_from_video(video_file, status_callback):
status_callback("🎞 Вылучаем аўдыё з відэа...")
audio = AudioSegment.from_file(video_file)
path = f"extracted_{uuid.uuid4().hex}.mp3"
audio.export(path, format="mp3")
status_callback("✅ Аўдыё вылучана.")
return path
def process_audio(audio_path, status_callback):
result = transcribe_audio(audio_path, status_callback)
if not isinstance(result, list):
return f"Памылка: {result}", None
status_callback("📝 Канвертацыя ў SRT...")
content, filename = transcripts_to_srt(result)
status_callback("✅ SRT-файл гатовы.")
return content, filename
def process_video(video_path, status_callback):
audio_path = extract_audio_from_video(video_path, status_callback)
return process_audio(audio_path, status_callback)
def process_file(audio, video, status_callback):
status_callback("🔄 Пачатак апрацоўкі...")
if audio:
return process_audio(audio, status_callback)
elif video:
return process_video(video, status_callback)
return "Няма файла для апрацоўкі.", None
# -----------------------
# Gradio UI
# -----------------------
with gr.Blocks() as demo:
gr.Markdown(
"""
## Загрузіце аўдыёфайл або відэафайл. Субцітры будуць згенераваны разам з файлам субцітраў.
[Ёсць пытанні ці прапановы? Далучайцеся да беларускаймоўнай суполкі tuteishygpt ](https://t.me/SHibelChat)
**Хочаце каб сэрвіс працаваў? Налівайце каву! :** [Buy me a coffee](https://buymeacoffee.com/tuteishygpt)
**Агучце беларускую мову тут :** [Беларуская мадэль маўлення](https://huggingface.co/spaces/archivartaunik/Bextts)
"""
)
with gr.Row():
audio_input = gr.Audio(type="filepath", label="🎙 Аўдыёфайл")
video_input = gr.Video(label="🎥 Відэафайл") # вяртае str
btn = gr.Button("🚀 Апрацаваць")
with gr.Row():
transcript_output = gr.Textbox(label="📄 SRT-транскрыпцыя", lines=10)
file_output = gr.File(label="⬇️ SRT-файл")
status_output = gr.Textbox(label="🛠️ Статус", interactive=False)
def wrapped_process(audio, video):
def update_status(text):
status_output.value = text
return process_file(audio, video, update_status)
btn.click(
fn=wrapped_process,
inputs=[audio_input, video_input],
outputs=[transcript_output, file_output],
)
demo.launch()
|