TerminalCalm's picture
feat: finished readme
09b83c3 verified
import gradio as gr
import shutil
import os
import subprocess
from src.mcp.tools import letter_counter
from src.mcp import video_tools
from src.llm import llm
from gradio.oauth import OAuthToken
def is_ffmpeg_installed():
"""Checks if FFmpeg is installed and accessible in the system's PATH."""
return shutil.which("ffmpeg") is not None
def update_ffmpeg_status():
"""Creates a colored status message indicating if FFmpeg is installed."""
if is_ffmpeg_installed():
return "<p style='color:green; font-weight:bold;'>ffmpeg is installed</p>", True
return "<p style='color:red; font-weight:bold;'>ffmpeg is not installed</p>", False
def handle_video_upload(file_obj, ffmpeg_installed):
"""
Validates an uploaded MP4 file, saves it to a temporary directory,
and controls the visibility of UI components based on validation success.
"""
if not ffmpeg_installed:
return None, "Cannot process video: FFmpeg is not installed.", None, gr.update(visible=False)
if not file_obj:
return None, "Please upload a file.", None, gr.update(visible=False)
tmp_dir = "tmp"
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
# Basic check for file extension
if not file_obj.name.lower().endswith('.mp4'):
return None, "File must be an .mp4 file.", None, gr.update(visible=False)
# More robust check using ffprobe
try:
subprocess.run(
["ffprobe", "-v", "error", "-show_format", "-show_streams", file_obj.name],
capture_output=True, text=True, check=True
)
except (subprocess.CalledProcessError, FileNotFoundError):
return None, "Could not validate file. Ensure it's a valid MP4.", None, gr.update(visible=False)
file_path = os.path.join(tmp_dir, os.path.basename(file_obj.name))
shutil.copy(file_obj.name, file_path)
return file_path, "File uploaded successfully!", file_path, gr.update(visible=True)
def clear_previous_outputs():
"""Clears all video-related outputs to ensure a clean state for new uploads."""
return (
None, # video_output
"", # upload_status_text
None, # uploaded_video_path_state
gr.update(visible=False), # video_tools_group
None, # first_frame_img
None, # last_frame_img
None, # llm_media_output
"", # video_command_status
"", # llm_debug_output
"", # llm_raw_response (now a textbox, clear with empty string)
)
# --- Gradio UI ---
with gr.Blocks() as demo:
# --- State Variables ---
ffmpeg_installed_state = gr.State(False)
uploaded_video_path_state = gr.State("")
with gr.Tabs():
# --- Setup & Video Tab ---
with gr.Tab("Setup & Video"):
gr.Markdown("## System Status")
with gr.Row():
check_ffmpeg_btn = gr.Button("Check FFmpeg Status")
status_text = gr.Markdown("Status will be checked on load.")
gr.Markdown("---")
gr.Markdown("## Video Tools")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Upload Video")
file_input = gr.File(label="Upload MP4", file_types=[".mp4"])
video_output = gr.Video(label="Preview", interactive=False, height="50vh")
upload_status_text = gr.Textbox(label="Upload Status", interactive=False)
with gr.Column(scale=1):
# This empty column will take up the other 50% of the space
pass
with gr.Tab("Debug"):
with gr.Column(scale=2, visible=False) as video_tools_group:
gr.Markdown("### Manual Frame Extraction")
with gr.Row():
get_first_frame_btn = gr.Button("Get First Frame")
get_last_frame_btn = gr.Button("Get Last Frame")
with gr.Row():
first_frame_img = gr.Image(label="First Frame", type="filepath", interactive=False)
last_frame_img = gr.Image(label="Last Frame", type="filepath", interactive=False)
with gr.Tab("LLM Video Commands"):
gr.Markdown("## Test MCP Tool Calls with an LLM")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Video Command Center")
video_prompt_textbox = gr.Textbox(label="Video Command Prompt", placeholder="e.g., 'Get me the first frame of the video'")
process_prompt_btn = gr.Button("Run Command")
video_command_status = gr.Textbox(label="LLM Status", interactive=False)
with gr.Accordion("Debug Info", open=False):
llm_debug_output = gr.Textbox(label="Debug Info", lines=10, interactive=False)
llm_raw_response = gr.Textbox(label="Raw LLM Response", lines=10, interactive=True)
with gr.Column(scale=1):
gr.Markdown("### LLM Result")
llm_media_output = gr.Image(label="Output", type="filepath", interactive=False)
# --- LLM Configuration Tab ---
with gr.Tab("LLM Configuration"):
gr.Markdown("## Configure LLM")
llm_provider = gr.Radio(
["Ollama", "Hugging Face"],
value="Ollama",
label="LLM Provider",
info="Select the LLM provider to use."
)
# Shared state for model name across providers
model_name_state = gr.State("")
with gr.Group(visible=True) as ollama_config:
gr.Markdown("### Ollama Configuration")
with gr.Row():
ollama_url_textbox = gr.Textbox(
placeholder="http://localhost:11434",
label="Ollama Endpoint URL",
interactive=True,
elem_id="ollama_url"
)
check_endpoint_btn = gr.Button("Check Endpoint")
ollama_status_textbox = gr.Textbox(label="Status", interactive=False)
with gr.Row():
ollama_model_dropdown = gr.Dropdown(
label="Select a Model",
interactive=True,
visible=False,
elem_id="ollama_model_dropdown"
)
set_preferred_btn = gr.Button("Set as Preferred", visible=False)
preferred_llm_display = gr.Textbox(label="Preferred Model Status", interactive=False)
with gr.Group(visible=False) as hf_config:
gr.Markdown("### Hugging Face Configuration")
gr.Markdown(f"**Model ID:** `{llm.HF_MODEL_ID}` (hardcoded).")
gr.Markdown("To use this provider, please log in with your Hugging Face account. API calls will use your personal token.")
login_button = gr.LoginButton()
# Hidden tab for exposing functions as MCP tools
with gr.Tab("MCP Tools", visible=False):
gr.Markdown("## Tools for MCP Server")
# Input for video path for MCP tools
mcp_video_path_input = gr.Textbox(label="Video Path for MCP")
# For letter_counter
lc_word_input = gr.Textbox(label="Word")
lc_letter_input = gr.Textbox(label="Letter")
lc_output = gr.Number(label="Count")
lc_btn = gr.Button("Count Letters")
# For video tools
mcp_get_first_frame_btn = gr.Button("MCP Get First Frame")
mcp_get_last_frame_btn = gr.Button("MCP Get Last Frame")
mcp_convert_to_gif_btn = gr.Button("MCP Convert to GIF")
# --- Event Handlers ---
# --- LLM Provider Change ---
def update_provider_visibility(provider):
if provider == "Ollama":
return gr.update(visible=True), gr.update(visible=False)
else: # Hugging Face
return gr.update(visible=False), gr.update(visible=True)
llm_provider.change(
fn=update_provider_visibility,
inputs=llm_provider,
outputs=[ollama_config, hf_config],
show_progress=False
)
# --- System Events ---
demo.load(
fn=update_ffmpeg_status,
outputs=[status_text, ffmpeg_installed_state]
).then(
fn=llm.load_settings,
outputs=[
llm_provider,
ollama_url_textbox,
model_name_state,
preferred_llm_display,
]
).then(
fn=update_provider_visibility,
inputs=llm_provider,
outputs=[ollama_config, hf_config],
show_progress=False
).then(
fn=llm.check_on_load,
inputs=[ollama_url_textbox, model_name_state],
outputs=[ollama_status_textbox, ollama_model_dropdown, set_preferred_btn, ollama_url_textbox, preferred_llm_display]
)
# --- FFmpeg Events ---
check_ffmpeg_btn.click(
fn=update_ffmpeg_status,
outputs=[status_text, ffmpeg_installed_state]
)
file_input.upload(
fn=clear_previous_outputs,
outputs=[
video_output,
upload_status_text,
uploaded_video_path_state,
video_tools_group,
first_frame_img,
last_frame_img,
llm_media_output,
video_command_status,
llm_debug_output,
llm_raw_response
]
).then(
fn=handle_video_upload,
inputs=[file_input, ffmpeg_installed_state],
outputs=[video_output, upload_status_text, uploaded_video_path_state, video_tools_group]
)
get_first_frame_btn.click(
fn=video_tools.getFirstFrame,
inputs=uploaded_video_path_state,
outputs=first_frame_img
)
get_last_frame_btn.click(
fn=video_tools.getLastFrame,
inputs=uploaded_video_path_state,
outputs=last_frame_img
)
# --- Ollama Events ---
check_endpoint_btn.click(
fn=llm.check_ollama_endpoint,
inputs=[ollama_url_textbox, model_name_state],
outputs=[ollama_status_textbox, ollama_model_dropdown, set_preferred_btn, ollama_url_textbox]
)
set_preferred_btn.click(
fn=llm.set_preferred_model,
inputs=[ollama_model_dropdown, ollama_url_textbox],
outputs=[model_name_state, preferred_llm_display]
)
# --- LLM Command Events ---
process_prompt_btn.click(
fn=llm.dispatch_video_prompt,
inputs=[
llm_provider,
video_prompt_textbox,
uploaded_video_path_state,
ollama_url_textbox,
model_name_state,
],
outputs=[
llm_media_output,
llm_debug_output,
video_command_status,
llm_raw_response
]
)
# --- MCP Tool Events (Hidden) ---
lc_btn.click(
fn=letter_counter,
inputs=[lc_word_input, lc_letter_input],
outputs=lc_output,
api_name="letter_counter"
)
mcp_get_first_frame_btn.click(
fn=video_tools.getFirstFrame,
inputs=[mcp_video_path_input],
api_name="getFirstFrame"
)
mcp_get_last_frame_btn.click(
fn=video_tools.getLastFrame,
inputs=[mcp_video_path_input],
api_name="getLastFrame"
)
mcp_convert_to_gif_btn.click(
fn=video_tools.convert_mp4_to_gif,
inputs=[mcp_video_path_input],
api_name="convert_mp4_to_gif"
)
if __name__ == "__main__":
demo.launch(mcp_server=True)