File size: 11,768 Bytes
7370b88 4aaface 7370b88 4aaface 55adbbb 4aaface 7370b88 09b83c3 55adbbb 7370b88 09b83c3 7370b88 09b83c3 7370b88 55adbbb 7370b88 55adbbb 7370b88 55adbbb 4aaface 7370b88 4aaface 7370b88 55adbbb d6a3985 55adbbb d6a3985 7370b88 55adbbb 7370b88 55adbbb 7370b88 4aaface 55adbbb 7370b88 55adbbb 7370b88 55adbbb 7370b88 55adbbb 7370b88 55adbbb 7370b88 55adbbb 7370b88 55adbbb 7370b88 55adbbb 7370b88 55adbbb 4aaface 55adbbb 7370b88 4aaface 55adbbb 4aaface 7370b88 55adbbb d6a3985 55adbbb d6a3985 55adbbb d6a3985 55adbbb d6a3985 55adbbb d6a3985 55adbbb d6a3985 7370b88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 |
import gradio as gr
import shutil
import os
import subprocess
from src.mcp.tools import letter_counter
from src.mcp import video_tools
from src.llm import llm
from gradio.oauth import OAuthToken
def is_ffmpeg_installed():
"""Checks if FFmpeg is installed and accessible in the system's PATH."""
return shutil.which("ffmpeg") is not None
def update_ffmpeg_status():
"""Creates a colored status message indicating if FFmpeg is installed."""
if is_ffmpeg_installed():
return "<p style='color:green; font-weight:bold;'>ffmpeg is installed</p>", True
return "<p style='color:red; font-weight:bold;'>ffmpeg is not installed</p>", False
def handle_video_upload(file_obj, ffmpeg_installed):
"""
Validates an uploaded MP4 file, saves it to a temporary directory,
and controls the visibility of UI components based on validation success.
"""
if not ffmpeg_installed:
return None, "Cannot process video: FFmpeg is not installed.", None, gr.update(visible=False)
if not file_obj:
return None, "Please upload a file.", None, gr.update(visible=False)
tmp_dir = "tmp"
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
# Basic check for file extension
if not file_obj.name.lower().endswith('.mp4'):
return None, "File must be an .mp4 file.", None, gr.update(visible=False)
# More robust check using ffprobe
try:
subprocess.run(
["ffprobe", "-v", "error", "-show_format", "-show_streams", file_obj.name],
capture_output=True, text=True, check=True
)
except (subprocess.CalledProcessError, FileNotFoundError):
return None, "Could not validate file. Ensure it's a valid MP4.", None, gr.update(visible=False)
file_path = os.path.join(tmp_dir, os.path.basename(file_obj.name))
shutil.copy(file_obj.name, file_path)
return file_path, "File uploaded successfully!", file_path, gr.update(visible=True)
def clear_previous_outputs():
"""Clears all video-related outputs to ensure a clean state for new uploads."""
return (
None, # video_output
"", # upload_status_text
None, # uploaded_video_path_state
gr.update(visible=False), # video_tools_group
None, # first_frame_img
None, # last_frame_img
None, # llm_media_output
"", # video_command_status
"", # llm_debug_output
"", # llm_raw_response (now a textbox, clear with empty string)
)
# --- Gradio UI ---
with gr.Blocks() as demo:
# --- State Variables ---
ffmpeg_installed_state = gr.State(False)
uploaded_video_path_state = gr.State("")
with gr.Tabs():
# --- Setup & Video Tab ---
with gr.Tab("Setup & Video"):
gr.Markdown("## System Status")
with gr.Row():
check_ffmpeg_btn = gr.Button("Check FFmpeg Status")
status_text = gr.Markdown("Status will be checked on load.")
gr.Markdown("---")
gr.Markdown("## Video Tools")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Upload Video")
file_input = gr.File(label="Upload MP4", file_types=[".mp4"])
video_output = gr.Video(label="Preview", interactive=False, height="50vh")
upload_status_text = gr.Textbox(label="Upload Status", interactive=False)
with gr.Column(scale=1):
# This empty column will take up the other 50% of the space
pass
with gr.Tab("Debug"):
with gr.Column(scale=2, visible=False) as video_tools_group:
gr.Markdown("### Manual Frame Extraction")
with gr.Row():
get_first_frame_btn = gr.Button("Get First Frame")
get_last_frame_btn = gr.Button("Get Last Frame")
with gr.Row():
first_frame_img = gr.Image(label="First Frame", type="filepath", interactive=False)
last_frame_img = gr.Image(label="Last Frame", type="filepath", interactive=False)
with gr.Tab("LLM Video Commands"):
gr.Markdown("## Test MCP Tool Calls with an LLM")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Video Command Center")
video_prompt_textbox = gr.Textbox(label="Video Command Prompt", placeholder="e.g., 'Get me the first frame of the video'")
process_prompt_btn = gr.Button("Run Command")
video_command_status = gr.Textbox(label="LLM Status", interactive=False)
with gr.Accordion("Debug Info", open=False):
llm_debug_output = gr.Textbox(label="Debug Info", lines=10, interactive=False)
llm_raw_response = gr.Textbox(label="Raw LLM Response", lines=10, interactive=True)
with gr.Column(scale=1):
gr.Markdown("### LLM Result")
llm_media_output = gr.Image(label="Output", type="filepath", interactive=False)
# --- LLM Configuration Tab ---
with gr.Tab("LLM Configuration"):
gr.Markdown("## Configure LLM")
llm_provider = gr.Radio(
["Ollama", "Hugging Face"],
value="Ollama",
label="LLM Provider",
info="Select the LLM provider to use."
)
# Shared state for model name across providers
model_name_state = gr.State("")
with gr.Group(visible=True) as ollama_config:
gr.Markdown("### Ollama Configuration")
with gr.Row():
ollama_url_textbox = gr.Textbox(
placeholder="http://localhost:11434",
label="Ollama Endpoint URL",
interactive=True,
elem_id="ollama_url"
)
check_endpoint_btn = gr.Button("Check Endpoint")
ollama_status_textbox = gr.Textbox(label="Status", interactive=False)
with gr.Row():
ollama_model_dropdown = gr.Dropdown(
label="Select a Model",
interactive=True,
visible=False,
elem_id="ollama_model_dropdown"
)
set_preferred_btn = gr.Button("Set as Preferred", visible=False)
preferred_llm_display = gr.Textbox(label="Preferred Model Status", interactive=False)
with gr.Group(visible=False) as hf_config:
gr.Markdown("### Hugging Face Configuration")
gr.Markdown(f"**Model ID:** `{llm.HF_MODEL_ID}` (hardcoded).")
gr.Markdown("To use this provider, please log in with your Hugging Face account. API calls will use your personal token.")
login_button = gr.LoginButton()
# Hidden tab for exposing functions as MCP tools
with gr.Tab("MCP Tools", visible=False):
gr.Markdown("## Tools for MCP Server")
# Input for video path for MCP tools
mcp_video_path_input = gr.Textbox(label="Video Path for MCP")
# For letter_counter
lc_word_input = gr.Textbox(label="Word")
lc_letter_input = gr.Textbox(label="Letter")
lc_output = gr.Number(label="Count")
lc_btn = gr.Button("Count Letters")
# For video tools
mcp_get_first_frame_btn = gr.Button("MCP Get First Frame")
mcp_get_last_frame_btn = gr.Button("MCP Get Last Frame")
mcp_convert_to_gif_btn = gr.Button("MCP Convert to GIF")
# --- Event Handlers ---
# --- LLM Provider Change ---
def update_provider_visibility(provider):
if provider == "Ollama":
return gr.update(visible=True), gr.update(visible=False)
else: # Hugging Face
return gr.update(visible=False), gr.update(visible=True)
llm_provider.change(
fn=update_provider_visibility,
inputs=llm_provider,
outputs=[ollama_config, hf_config],
show_progress=False
)
# --- System Events ---
demo.load(
fn=update_ffmpeg_status,
outputs=[status_text, ffmpeg_installed_state]
).then(
fn=llm.load_settings,
outputs=[
llm_provider,
ollama_url_textbox,
model_name_state,
preferred_llm_display,
]
).then(
fn=update_provider_visibility,
inputs=llm_provider,
outputs=[ollama_config, hf_config],
show_progress=False
).then(
fn=llm.check_on_load,
inputs=[ollama_url_textbox, model_name_state],
outputs=[ollama_status_textbox, ollama_model_dropdown, set_preferred_btn, ollama_url_textbox, preferred_llm_display]
)
# --- FFmpeg Events ---
check_ffmpeg_btn.click(
fn=update_ffmpeg_status,
outputs=[status_text, ffmpeg_installed_state]
)
file_input.upload(
fn=clear_previous_outputs,
outputs=[
video_output,
upload_status_text,
uploaded_video_path_state,
video_tools_group,
first_frame_img,
last_frame_img,
llm_media_output,
video_command_status,
llm_debug_output,
llm_raw_response
]
).then(
fn=handle_video_upload,
inputs=[file_input, ffmpeg_installed_state],
outputs=[video_output, upload_status_text, uploaded_video_path_state, video_tools_group]
)
get_first_frame_btn.click(
fn=video_tools.getFirstFrame,
inputs=uploaded_video_path_state,
outputs=first_frame_img
)
get_last_frame_btn.click(
fn=video_tools.getLastFrame,
inputs=uploaded_video_path_state,
outputs=last_frame_img
)
# --- Ollama Events ---
check_endpoint_btn.click(
fn=llm.check_ollama_endpoint,
inputs=[ollama_url_textbox, model_name_state],
outputs=[ollama_status_textbox, ollama_model_dropdown, set_preferred_btn, ollama_url_textbox]
)
set_preferred_btn.click(
fn=llm.set_preferred_model,
inputs=[ollama_model_dropdown, ollama_url_textbox],
outputs=[model_name_state, preferred_llm_display]
)
# --- LLM Command Events ---
process_prompt_btn.click(
fn=llm.dispatch_video_prompt,
inputs=[
llm_provider,
video_prompt_textbox,
uploaded_video_path_state,
ollama_url_textbox,
model_name_state,
],
outputs=[
llm_media_output,
llm_debug_output,
video_command_status,
llm_raw_response
]
)
# --- MCP Tool Events (Hidden) ---
lc_btn.click(
fn=letter_counter,
inputs=[lc_word_input, lc_letter_input],
outputs=lc_output,
api_name="letter_counter"
)
mcp_get_first_frame_btn.click(
fn=video_tools.getFirstFrame,
inputs=[mcp_video_path_input],
api_name="getFirstFrame"
)
mcp_get_last_frame_btn.click(
fn=video_tools.getLastFrame,
inputs=[mcp_video_path_input],
api_name="getLastFrame"
)
mcp_convert_to_gif_btn.click(
fn=video_tools.convert_mp4_to_gif,
inputs=[mcp_video_path_input],
api_name="convert_mp4_to_gif"
)
if __name__ == "__main__":
demo.launch(mcp_server=True)
|