File size: 6,325 Bytes
efaf26b
 
 
9f65ebc
efaf26b
9f65ebc
2630524
6d576a6
 
22e8728
efaf26b
6d576a6
efaf26b
6d576a6
aa19b68
2630524
6d576a6
2630524
df57e44
e4f57af
 
 
 
 
 
 
 
 
 
14becec
e4f57af
df57e44
 
 
 
 
 
e4f57af
efaf26b
 
6d576a6
efaf26b
6d576a6
 
 
 
 
 
 
 
 
22e8728
efaf26b
6d576a6
efaf26b
6d576a6
 
 
 
 
 
 
 
 
22e8728
6d576a6
 
efaf26b
6d576a6
 
 
efaf26b
e4f57af
df57e44
efaf26b
6d576a6
 
 
efaf26b
6d576a6
efaf26b
2630524
 
efaf26b
6d576a6
df57e44
6d576a6
efaf26b
22e8728
df57e44
efaf26b
e4f57af
 
 
 
 
 
efaf26b
df57e44
e4f57af
 
 
 
 
 
 
 
 
 
 
 
 
6d576a6
 
df57e44
 
 
6d576a6
df57e44
e4f57af
6d576a6
 
2630524
 
6d576a6
 
 
 
22e8728
6d576a6
 
 
efaf26b
6d576a6
 
e4f57af
6d576a6
e4f57af
6d576a6
e4f57af
efaf26b
df57e44
efaf26b
df57e44
6d576a6
9f65ebc
cd19dc0
 
 
14becec
cd19dc0
 
 
 
efaf26b
2630524
6d576a6
2630524
6d576a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efaf26b
e4f57af
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import os
import json
import mimetypes
import gradio as gr
import google.generativeai as genai
from pydub import AudioSegment
import uuid
import time
import threading

# -----------------------
# Канфігурацыя
# -----------------------

GEMINI_API_KEY = os.getenv("gemini")
if not GEMINI_API_KEY:
    raise ValueError("Не знойдзены API ключ для Gemini.")

genai.configure(api_key=GEMINI_API_KEY)

generation_config = {
    "temperature": 0.35,
    "top_p": 0.95,
    "top_k": 64,
    "max_output_tokens": 65536,
    "response_mime_type": "application/json",
}

model = genai.GenerativeModel(
    model_name="gemini-2.5-flash",
    generation_config=generation_config,
    system_instruction="""transcribe
format example:
    "start": 858.37,
    "end": 859.56,
    "text": "Калі бяспечна, безумоўна."
""",
)

# -----------------------
# Прагрэс-анімацыя (фонавы паток)
# -----------------------

def progress_animation(status_callback, stop_event):
    frames = ["⏳", "⏳.", "⏳..", "⏳..."]
    while not stop_event.is_set():
        for frame in frames:
            if stop_event.is_set():
                break
            status_callback(f"Транскрыпцыя ідзе {frame}")
            time.sleep(0.6)

# -----------------------
# Асноўныя функцыі
# -----------------------

def upload_to_gemini(path, status_callback):
    mime_type, _ = mimetypes.guess_type(path)
    status_callback(f"📤 Загружаем файл у Gemini...")
    file = genai.upload_file(path, mime_type=mime_type)
    status_callback("✅ Файл загружаны.")
    return file

def transcribe_audio(audio_path, status_callback):
    try:
        status_callback("🔍 Падрыхтоўка транскрыпцыі...")
        file_obj = upload_to_gemini(audio_path, status_callback)

        stop_event = threading.Event()
        t = threading.Thread(target=progress_animation, args=(status_callback, stop_event))
        t.start()

        chat = model.start_chat(history=[])
        response = chat.send_message(file_obj)

        stop_event.set()
        t.join()

        if not response.text:
            return "❌ Пусты адказ ад мадэлі."

        with open("last_response.json", "w", encoding="utf-8") as f:
            f.write(response.text)

        status_callback("📥 Апрацоўка транскрыпцыі...")
        transcripts = json.loads(response.text)
        status_callback(f"✅ Гатова: {len(transcripts)} фрагментаў.")
        return transcripts
    except Exception as e:
        return f"Памылка: {e}"

def seconds_to_timestamp(sec: float) -> str:
    h, remainder = divmod(sec, 3600)
    m, remainder = divmod(remainder, 60)
    s = int(remainder)
    ms = int(round((remainder - s) * 1000))
    return f"{int(h):02d}:{int(m):02d}:{s:02d},{ms:03d}"

def transcripts_to_srt(transcripts, filename="subtitles.srt"):
    try:
        srt_lines = []
        for idx, seg in enumerate(transcripts, start=1):
            start_ts = seconds_to_timestamp(seg["start"])
            end_ts = seconds_to_timestamp(seg["end"])
            srt_lines.append(f"{idx}\n{start_ts} --> {end_ts}\n{seg['text']}\n")
        content = "\n".join(srt_lines)
        with open(filename, "w", encoding="utf-8") as f:
            f.write(content)
        return content, filename
    except Exception as e:
        return f"Памылка пры запісе SRT: {e}", None

def extract_audio_from_video(video_file, status_callback):
    status_callback("🎞 Вылучаем аўдыё з відэа...")
    audio = AudioSegment.from_file(video_file)
    path = f"extracted_{uuid.uuid4().hex}.mp3"
    audio.export(path, format="mp3")
    status_callback("✅ Аўдыё вылучана.")
    return path

def process_audio(audio_path, status_callback):
    result = transcribe_audio(audio_path, status_callback)
    if not isinstance(result, list):
        return f"Памылка: {result}", None
    status_callback("📝 Канвертацыя ў SRT...")
    content, filename = transcripts_to_srt(result)
    status_callback("✅ SRT-файл гатовы.")
    return content, filename

def process_video(video_path, status_callback):
    audio_path = extract_audio_from_video(video_path, status_callback)
    return process_audio(audio_path, status_callback)

def process_file(audio, video, status_callback):
    status_callback("🔄 Пачатак апрацоўкі...")
    if audio:
        return process_audio(audio, status_callback)
    elif video:
        return process_video(video, status_callback)
    return "Няма файла для апрацоўкі.", None

# -----------------------
# Gradio UI
# -----------------------

with gr.Blocks() as demo:
    gr.Markdown(
        """
## Загрузіце аўдыёфайл або відэафайл. Субцітры будуць згенераваны разам з файлам субцітраў. 
[Ёсць пытанні ці прапановы? Далучайцеся да беларускаймоўнай суполкі tuteishygpt  ](https://t.me/SHibelChat)
**Хочаце каб сэрвіс працаваў? Налівайце каву! :** [Buy me a coffee](https://buymeacoffee.com/tuteishygpt)
**Агучце беларускую мову тут :** [Беларуская мадэль маўлення](https://huggingface.co/spaces/archivartaunik/Bextts)
        """
 )
    with gr.Row():
        audio_input = gr.Audio(type="filepath", label="🎙 Аўдыёфайл")
        video_input = gr.Video(label="🎥 Відэафайл")  # вяртае str
    btn = gr.Button("🚀 Апрацаваць")
    with gr.Row():
        transcript_output = gr.Textbox(label="📄 SRT-транскрыпцыя", lines=10)
        file_output = gr.File(label="⬇️ SRT-файл")
    status_output = gr.Textbox(label="🛠️ Статус", interactive=False)

    def wrapped_process(audio, video):
        def update_status(text):
            status_output.value = text
        return process_file(audio, video, update_status)

    btn.click(
        fn=wrapped_process,
        inputs=[audio_input, video_input],
        outputs=[transcript_output, file_output],
    )

demo.launch()