File size: 11,768 Bytes
7370b88
 
 
 
 
 
 
4aaface
7370b88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4aaface
55adbbb
 
4aaface
7370b88
 
 
 
 
 
 
 
 
09b83c3
55adbbb
7370b88
 
 
 
 
 
 
 
 
 
 
09b83c3
7370b88
09b83c3
 
 
 
 
 
 
 
 
 
 
 
 
7370b88
55adbbb
 
7370b88
 
 
55adbbb
 
 
7370b88
55adbbb
 
4aaface
7370b88
 
4aaface
 
7370b88
55adbbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6a3985
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55adbbb
d6a3985
7370b88
 
55adbbb
 
 
 
 
 
 
 
 
 
 
 
 
 
7370b88
55adbbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7370b88
 
 
 
 
 
 
 
 
 
 
 
 
 
4aaface
55adbbb
 
 
7370b88
 
 
 
 
 
 
 
 
 
55adbbb
7370b88
 
 
 
 
55adbbb
7370b88
 
55adbbb
 
7370b88
55adbbb
 
7370b88
 
55adbbb
7370b88
55adbbb
 
7370b88
55adbbb
 
 
 
7370b88
55adbbb
4aaface
55adbbb
 
 
7370b88
 
4aaface
 
55adbbb
4aaface
7370b88
 
 
55adbbb
d6a3985
 
 
 
 
 
55adbbb
d6a3985
 
55adbbb
d6a3985
 
 
 
 
55adbbb
d6a3985
 
55adbbb
 
d6a3985
55adbbb
d6a3985
 
 
7370b88
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
import gradio as gr
import shutil
import os
import subprocess
from src.mcp.tools import letter_counter
from src.mcp import video_tools
from src.llm import llm
from gradio.oauth import OAuthToken

def is_ffmpeg_installed():
    """Checks if FFmpeg is installed and accessible in the system's PATH."""
    return shutil.which("ffmpeg") is not None

def update_ffmpeg_status():
    """Creates a colored status message indicating if FFmpeg is installed."""
    if is_ffmpeg_installed():
        return "<p style='color:green; font-weight:bold;'>ffmpeg is installed</p>", True
    return "<p style='color:red; font-weight:bold;'>ffmpeg is not installed</p>", False

def handle_video_upload(file_obj, ffmpeg_installed):
    """
    Validates an uploaded MP4 file, saves it to a temporary directory,
    and controls the visibility of UI components based on validation success.
    """
    if not ffmpeg_installed:
        return None, "Cannot process video: FFmpeg is not installed.", None, gr.update(visible=False)
    if not file_obj:
        return None, "Please upload a file.", None, gr.update(visible=False)

    tmp_dir = "tmp"
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)

    # Basic check for file extension
    if not file_obj.name.lower().endswith('.mp4'):
        return None, "File must be an .mp4 file.", None, gr.update(visible=False)

    # More robust check using ffprobe
    try:
        subprocess.run(
            ["ffprobe", "-v", "error", "-show_format", "-show_streams", file_obj.name],
            capture_output=True, text=True, check=True
        )
    except (subprocess.CalledProcessError, FileNotFoundError):
        return None, "Could not validate file. Ensure it's a valid MP4.", None, gr.update(visible=False)

    file_path = os.path.join(tmp_dir, os.path.basename(file_obj.name))
    shutil.copy(file_obj.name, file_path)
    
    return file_path, "File uploaded successfully!", file_path, gr.update(visible=True)

def clear_previous_outputs():
    """Clears all video-related outputs to ensure a clean state for new uploads."""
    return (
        None, # video_output
        "",   # upload_status_text
        None, # uploaded_video_path_state
        gr.update(visible=False), # video_tools_group
        None, # first_frame_img
        None, # last_frame_img
        None, # llm_media_output
        "",   # video_command_status
        "",   # llm_debug_output
        "",   # llm_raw_response (now a textbox, clear with empty string)
    )

# --- Gradio UI ---
with gr.Blocks() as demo:
    # --- State Variables ---
    ffmpeg_installed_state = gr.State(False)
    uploaded_video_path_state = gr.State("")

    with gr.Tabs():
        # --- Setup & Video Tab ---
        with gr.Tab("Setup & Video"):
            gr.Markdown("## System Status")
            with gr.Row():
                check_ffmpeg_btn = gr.Button("Check FFmpeg Status")
                status_text = gr.Markdown("Status will be checked on load.")

            gr.Markdown("---")
            gr.Markdown("## Video Tools")
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### Upload Video")
                    file_input = gr.File(label="Upload MP4", file_types=[".mp4"])
                    video_output = gr.Video(label="Preview", interactive=False, height="50vh")
                    upload_status_text = gr.Textbox(label="Upload Status", interactive=False)
                with gr.Column(scale=1):
                    # This empty column will take up the other 50% of the space
                    pass
        
        with gr.Tab("Debug"):
            with gr.Column(scale=2, visible=False) as video_tools_group:
                gr.Markdown("### Manual Frame Extraction")
                with gr.Row():
                    get_first_frame_btn = gr.Button("Get First Frame")
                    get_last_frame_btn = gr.Button("Get Last Frame")
                with gr.Row():
                    first_frame_img = gr.Image(label="First Frame", type="filepath", interactive=False)
                    last_frame_img = gr.Image(label="Last Frame", type="filepath", interactive=False)

        with gr.Tab("LLM Video Commands"):
            gr.Markdown("## Test MCP Tool Calls with an LLM")
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### Video Command Center")
                    video_prompt_textbox = gr.Textbox(label="Video Command Prompt", placeholder="e.g., 'Get me the first frame of the video'")
                    process_prompt_btn = gr.Button("Run Command")
                    video_command_status = gr.Textbox(label="LLM Status", interactive=False)

                    with gr.Accordion("Debug Info", open=False):
                        llm_debug_output = gr.Textbox(label="Debug Info", lines=10, interactive=False)
                        llm_raw_response = gr.Textbox(label="Raw LLM Response", lines=10, interactive=True)

                with gr.Column(scale=1):
                    gr.Markdown("### LLM Result")
                    llm_media_output = gr.Image(label="Output", type="filepath", interactive=False)

        # --- LLM Configuration Tab ---
        with gr.Tab("LLM Configuration"):
            gr.Markdown("## Configure LLM")
            llm_provider = gr.Radio(
                ["Ollama", "Hugging Face"], 
                value="Ollama", 
                label="LLM Provider",
                info="Select the LLM provider to use."
            )

            # Shared state for model name across providers
            model_name_state = gr.State("")

            with gr.Group(visible=True) as ollama_config:
                gr.Markdown("### Ollama Configuration")
                with gr.Row():
                    ollama_url_textbox = gr.Textbox(
                        placeholder="http://localhost:11434",
                        label="Ollama Endpoint URL",
                        interactive=True,
                        elem_id="ollama_url"
                    )
                    check_endpoint_btn = gr.Button("Check Endpoint")
                ollama_status_textbox = gr.Textbox(label="Status", interactive=False)
                
                with gr.Row():
                    ollama_model_dropdown = gr.Dropdown(
                        label="Select a Model",
                        interactive=True,
                        visible=False,
                        elem_id="ollama_model_dropdown"
                    )
                    set_preferred_btn = gr.Button("Set as Preferred", visible=False)

                preferred_llm_display = gr.Textbox(label="Preferred Model Status", interactive=False)
                
            with gr.Group(visible=False) as hf_config:
                gr.Markdown("### Hugging Face Configuration")
                gr.Markdown(f"**Model ID:** `{llm.HF_MODEL_ID}` (hardcoded).")
                gr.Markdown("To use this provider, please log in with your Hugging Face account. API calls will use your personal token.")
                login_button = gr.LoginButton()

        # Hidden tab for exposing functions as MCP tools
        with gr.Tab("MCP Tools", visible=False):
            gr.Markdown("## Tools for MCP Server")
            
            # Input for video path for MCP tools
            mcp_video_path_input = gr.Textbox(label="Video Path for MCP")
            
            # For letter_counter
            lc_word_input = gr.Textbox(label="Word")
            lc_letter_input = gr.Textbox(label="Letter")
            lc_output = gr.Number(label="Count")
            lc_btn = gr.Button("Count Letters")
            
            # For video tools
            mcp_get_first_frame_btn = gr.Button("MCP Get First Frame")
            mcp_get_last_frame_btn = gr.Button("MCP Get Last Frame")
            mcp_convert_to_gif_btn = gr.Button("MCP Convert to GIF")

    # --- Event Handlers ---

    # --- LLM Provider Change ---
    def update_provider_visibility(provider):
        if provider == "Ollama":
            return gr.update(visible=True), gr.update(visible=False)
        else: # Hugging Face
            return gr.update(visible=False), gr.update(visible=True)

    llm_provider.change(
        fn=update_provider_visibility,
        inputs=llm_provider,
        outputs=[ollama_config, hf_config],
        show_progress=False
    )
    
    # --- System Events ---
    demo.load(
        fn=update_ffmpeg_status,
        outputs=[status_text, ffmpeg_installed_state]
    ).then(
        fn=llm.load_settings,
        outputs=[
            llm_provider,
            ollama_url_textbox,
            model_name_state,
            preferred_llm_display,
        ]
    ).then(
        fn=update_provider_visibility,
        inputs=llm_provider,
        outputs=[ollama_config, hf_config],
        show_progress=False
    ).then(
        fn=llm.check_on_load,
        inputs=[ollama_url_textbox, model_name_state],
        outputs=[ollama_status_textbox, ollama_model_dropdown, set_preferred_btn, ollama_url_textbox, preferred_llm_display]
    )
    
    # --- FFmpeg Events ---
    check_ffmpeg_btn.click(
        fn=update_ffmpeg_status,
        outputs=[status_text, ffmpeg_installed_state]
    )

    file_input.upload(
        fn=clear_previous_outputs,
        outputs=[
            video_output,
            upload_status_text,
            uploaded_video_path_state,
            video_tools_group,
            first_frame_img,
            last_frame_img,
            llm_media_output,
            video_command_status,
            llm_debug_output,
            llm_raw_response
        ]
    ).then(
        fn=handle_video_upload,
        inputs=[file_input, ffmpeg_installed_state],
        outputs=[video_output, upload_status_text, uploaded_video_path_state, video_tools_group]
    )

    get_first_frame_btn.click(
        fn=video_tools.getFirstFrame,
        inputs=uploaded_video_path_state,
        outputs=first_frame_img
    )

    get_last_frame_btn.click(
        fn=video_tools.getLastFrame,
        inputs=uploaded_video_path_state,
        outputs=last_frame_img
    )

    # --- Ollama Events ---
    check_endpoint_btn.click(
        fn=llm.check_ollama_endpoint,
        inputs=[ollama_url_textbox, model_name_state],
        outputs=[ollama_status_textbox, ollama_model_dropdown, set_preferred_btn, ollama_url_textbox]
    )

    set_preferred_btn.click(
        fn=llm.set_preferred_model,
        inputs=[ollama_model_dropdown, ollama_url_textbox],
        outputs=[model_name_state, preferred_llm_display]
    )

    # --- LLM Command Events ---
    process_prompt_btn.click(
        fn=llm.dispatch_video_prompt,
        inputs=[
            llm_provider,
            video_prompt_textbox, 
            uploaded_video_path_state,
            ollama_url_textbox,
            model_name_state,
        ],
        outputs=[
            llm_media_output,
            llm_debug_output,
            video_command_status,
            llm_raw_response
        ]
    )

    # --- MCP Tool Events (Hidden) ---
    lc_btn.click(
        fn=letter_counter,
        inputs=[lc_word_input, lc_letter_input],
        outputs=lc_output,
        api_name="letter_counter"
    )
    
    mcp_get_first_frame_btn.click(
        fn=video_tools.getFirstFrame,
        inputs=[mcp_video_path_input],
        api_name="getFirstFrame"
    )

    mcp_get_last_frame_btn.click(
        fn=video_tools.getLastFrame,
        inputs=[mcp_video_path_input],
        api_name="getLastFrame"
    )

    mcp_convert_to_gif_btn.click(
        fn=video_tools.convert_mp4_to_gif,
        inputs=[mcp_video_path_input],
        api_name="convert_mp4_to_gif"
    )

if __name__ == "__main__":
    demo.launch(mcp_server=True)