Spaces:

AverageAiLiker
/

super-light-studio

Runtime error

App Files Files Community

AverageAiLiker commited on Oct 21

Commit

838951c

verified ·

1 Parent(s): b4dbf40

Deploy Gradio app with multiple files

Browse files

Files changed (5) hide show

app.py +239 -0
config.py +26 -0
models.py +68 -0
requirements.txt +9 -0
utils.py +146 -0

app.py ADDED Viewed

	@@ -0,0 +1,239 @@

+import gradio as gr
+import spaces
+import torch
+from diffusers import DiffusionPipeline
+import numpy as np
+from PIL import Image
+import os
+import tempfile
+from typing import Optional, Tuple
+import time
+from config import MODEL_ID, DEFAULT_HEIGHT, DEFAULT_WIDTH, DEFAULT_NUM_FRAMES, DEFAULT_NUM_INFERENCE_STEPS
+from utils import create_video_from_frames, save_video_temp, cleanup_temp_files
+from models import load_pipeline
+# Global pipeline variable
+pipeline = None
+@spaces.GPU(duration=300)
+def initialize_model():
+    """Initialize the Open-Sora-v2 pipeline"""
+    global pipeline
+    if pipeline is None:
+        pipeline = load_pipeline()
+    return "Model loaded successfully!"
+@spaces.GPU(duration=180)
+def generate_video(
+    prompt: str,
+    height: int = DEFAULT_HEIGHT,
+    width: int = DEFAULT_WIDTH,
+    num_frames: int = DEFAULT_NUM_FRAMES,
+    num_inference_steps: int = DEFAULT_NUM_INFERENCE_STEPS,
+    seed: Optional[int] = None,
+    progress=gr.Progress()
+) -> Tuple[str, str]:
+    """
+    Generate a video from text prompt using Open-Sora-v2
+    Args:
+        prompt (str): Text description of the video to generate
+        height (int): Height of the video frames
+        width (int): Width of the video frames
+        num_frames (int): Number of frames to generate
+        num_inference_steps (int): Number of denoising steps
+        seed (int, optional): Random seed for reproducible generation
+    Returns:
+        Tuple[str, str]: Path to generated video file and status message
+    """
+    try:
+        # Initialize model if not already done
+        if pipeline is None:
+            progress(0.1, desc="Loading model...")
+            initialize_model()
+        # Set seed for reproducibility
+        if seed is not None:
+            torch.manual_seed(seed)
+        progress(0.2, desc="Generating video frames...")
+        # Generate video frames
+        video_frames = pipeline(
+            prompt=prompt,
+            height=height,
+            width=width,
+            num_frames=num_frames,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=7.5,
+        ).frames
+        progress(0.8, desc="Processing video...")
+        # Convert frames to video
+        video_path = save_video_temp(video_frames, fps=24)
+        progress(1.0, desc="Complete!")
+        return video_path, f"✅ Video generated successfully! ({len(video_frames)} frames)"
+    except Exception as e:
+        error_msg = f"❌ Error generating video: {str(e)}"
+        return None, error_msg
+def update_interface():
+    """Update interface based on model availability"""
+    return gr.update(interactive=True)
+def create_demo():
+    """Create the Gradio demo interface"""
+    with gr.Blocks(
+        title="Open-Sora-v2 Text to Video",
+        theme=gr.themes.Soft(),
+        css="""
+        .gradio-container {
+            max-width: 1200px !important;
+        }
+        .generate-btn {
+            background: linear-gradient(45deg, #667eea 0%, #764ba2 100%) !important;
+        }
+        """
+    ) as demo:
+        gr.HTML("""
+        <div style="text-align: center; margin-bottom: 20px;">
+            <h1>🎬 Open-Sora-v2 Text to Video Generator</h1>
+            <p>Generate amazing videos from text descriptions using Open-Sora-v2 model</p>
+            <p><a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">Built with anycoder</a></p>
+        </div>
+        """)
+        with gr.Row():
+            with gr.Column(scale=2):
+                # Input section
+                gr.Markdown("## 📝 Input")
+                prompt_input = gr.Textbox(
+                    label="Video Description",
+                    placeholder="Describe the video you want to generate...",
+                    lines=3,
+                    value="A beautiful sunset over the ocean with waves gently rolling"
+                )
+                with gr.Accordion("⚙️ Advanced Settings", open=False):
+                    with gr.Row():
+                        height_input = gr.Number(
+                            label="Height",
+                            value=DEFAULT_HEIGHT,
+                            minimum=256,
+                            maximum=1024,
+                            step=64
+                        )
+                        width_input = gr.Number(
+                            label="Width",
+                            value=DEFAULT_WIDTH,
+                            minimum=256,
+                            maximum=1024,
+                            step=64
+                        )
+                    with gr.Row():
+                        num_frames_input = gr.Slider(
+                            label="Number of Frames",
+                            value=DEFAULT_NUM_FRAMES,
+                            minimum=16,
+                            maximum=120,
+                            step=8
+                        )
+                        num_steps_input = gr.Slider(
+                            label="Inference Steps",
+                            value=DEFAULT_NUM_INFERENCE_STEPS,
+                            minimum=10,
+                            maximum=100,
+                            step=5
+                        )
+                    seed_input = gr.Number(
+                        label="Seed (optional)",
+                        value=None,
+                        precision=0
+                    )
+                generate_btn = gr.Button(
+                    "🎥 Generate Video",
+                    variant="primary",
+                    size="lg",
+                    elem_classes=["generate-btn"]
+                )
+            with gr.Column(scale=1):
+                # Output section
+                gr.Markdown("## 🎥 Output")
+                video_output = gr.Video(
+                    label="Generated Video",
+                    height=400
+                )
+                status_output = gr.Textbox(
+                    label="Status",
+                    interactive=False
+                )
+        # Example prompts
+        gr.Markdown("## 💡 Example Prompts")
+        examples = [
+            "A majestic eagle soaring through mountain peaks at sunrise",
+            "A busy city street with neon lights at night, cyberpunk style",
+            "A peaceful garden with butterflies fluttering around colorful flowers",
+            "A robot dancing in a futuristic disco with colorful lights",
+            "A serene lake reflecting autumn trees with falling leaves"
+        ]
+        with gr.Row():
+            for i, example in enumerate(examples):
+                example_btn = gr.Button(example, size="sm")
+                example_btn.click(
+                    lambda x=example: x,
+                    outputs=prompt_input
+                )
+        # Event handlers
+        generate_btn.click(
+            fn=generate_video,
+            inputs=[
+                prompt_input,
+                height_input,
+                width_input,
+                num_frames_input,
+                num_steps_input,
+                seed_input
+            ],
+            outputs=[video_output, status_output],
+            show_progress=True
+        )
+        # Initialize model on startup
+        demo.load(
+            fn=initialize_model,
+            outputs=[status_output]
+        )
+        # Cleanup on page close
+        demo.unload(
+            fn=cleanup_temp_files
+        )
+    return demo
+if __name__ == "__main__":
+    demo = create_demo()
+    demo.launch(
+        share=True,
+        show_error=True,
+        show_api=True
+    )

config.py ADDED Viewed

	@@ -0,0 +1,26 @@

+# Model configuration
+MODEL_ID = "hpcai-tech/Open-Sora-v2"
+# Default video generation parameters
+DEFAULT_HEIGHT = 320
+DEFAULT_WIDTH = 576
+DEFAULT_NUM_FRAMES = 64
+DEFAULT_NUM_INFERENCE_STEPS = 50
+# UI Configuration
+MAX_PROMPT_LENGTH = 1000
+MIN_HEIGHT = 256
+MAX_HEIGHT = 1024
+MIN_WIDTH = 256
+MAX_WIDTH = 1024
+MIN_FRAMES = 16
+MAX_FRAMES = 120
+MIN_STEPS = 10
+MAX_STEPS = 100
+# File paths
+TEMP_DIR = tempfile.gettempdir()
+VIDEO_DIR = os.path.join(TEMP_DIR, "opensora_videos")
+# Ensure video directory exists
+os.makedirs(VIDEO_DIR, exist_ok=True)

models.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import torch
+from diffusers import DiffusionPipeline
+import spaces
+from config import MODEL_ID
+def load_pipeline():
+    """
+    Load and configure the Open-Sora-v2 pipeline
+    """
+    try:
+        # Load the pipeline with appropriate configuration
+        pipeline = DiffusionPipeline.from_pretrained(
+            MODEL_ID,
+            torch_dtype=torch.float16,
+            variant="fp16",
+            use_safetensors=True
+        )
+        # Move to GPU if available
+        if torch.cuda.is_available():
+            pipeline = pipeline.to("cuda")
+            # Enable memory efficient attention if available
+            try:
+                pipeline.enable_xformers_memory_efficient_attention()
+            except Exception:
+                print("xformers not available, using default attention")
+            # Enable CPU offloading for memory efficiency
+            pipeline.enable_model_cpu_offload()
+        return pipeline
+    except Exception as e:
+        print(f"Error loading pipeline: {e}")
+        raise
+@spaces.GPU(duration=1500)
+def compile_transformer():
+    """
+    Optional: Compile the transformer for better performance
+    This is experimental and may not work with all models
+    """
+    try:
+        pipeline = load_pipeline()
+        # Capture example inputs
+        with spaces.aoti_capture(pipeline.transformer) as call:
+            pipeline("test prompt generation")
+        # Export the model
+        exported = torch.export.export(
+            pipeline.transformer,
+            args=call.args,
+            kwargs=call.kwargs,
+        )
+        # Compile the exported model
+        compiled_transformer = spaces.aoti_compile(exported)
+        # Apply compiled model to pipeline
+        spaces.aoti_apply(compiled_transformer, pipeline.transformer)
+        return pipeline
+    except Exception as e:
+        print(f"Compilation failed, using unoptimized model: {e}")
+        return load_pipeline()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio
+torch
+diffusers
+transformers
+accelerate
+imageio
+Pillow
+numpy
+spaces

utils.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import numpy as np
+import tempfile
+import os
+import imageio
+from PIL import Image
+from typing import List, Optional
+import shutil
+def create_video_from_frames(frames: List[np.ndarray], fps: int = 24) -> str:
+    """
+    Create a video file from a list of frames
+    Args:
+        frames (List[np.ndarray]): List of video frames as numpy arrays
+        fps (int): Frames per second for the output video
+    Returns:
+        str: Path to the generated video file
+    """
+    if not frames:
+        raise ValueError("No frames provided")
+    # Create temporary file
+    temp_dir = tempfile.mkdtemp()
+    video_path = os.path.join(temp_dir, "generated_video.mp4")
+    # Ensure frames are in the right format
+    processed_frames = []
+    for frame in frames:
+        if isinstance(frame, np.ndarray):
+            # Convert numpy array to PIL Image
+            if frame.dtype != np.uint8:
+                frame = (frame * 255).astype(np.uint8)
+            if len(frame.shape) == 3 and frame.shape[2] == 3:
+                # RGB image
+                pil_image = Image.fromarray(frame, mode='RGB')
+            elif len(frame.shape) == 3 and frame.shape[2] == 4:
+                # RGBA image
+                pil_image = Image.fromarray(frame, mode='RGBA')
+            else:
+                # Grayscale
+                pil_image = Image.fromarray(frame, mode='L')
+        else:
+            # Assume it's already a PIL Image
+            pil_image = frame
+        processed_frames.append(pil_image)
+    # Save as video
+    with imageio.get_writer(video_path, fps=fps) as writer:
+        for frame in processed_frames:
+            # Convert PIL to numpy array
+            frame_array = np.array(frame)
+            writer.append_data(frame_array)
+    return video_path
+def save_video_temp(frames: List, fps: int = 24) -> str:
+    """
+    Save video frames to a temporary file
+    Args:
+        frames (List): List of video frames
+        fps (int): Frames per second
+    Returns:
+        str: Path to the saved video file
+    """
+    try:
+        return create_video_from_frames(frames, fps)
+    except Exception as e:
+        # Fallback: save as GIF if video creation fails
+        temp_dir = tempfile.mkdtemp()
+        gif_path = os.path.join(temp_dir, "generated_video.gif")
+        # Convert frames to PIL Images and save as GIF
+        pil_frames = []
+        for frame in frames:
+            if isinstance(frame, np.ndarray):
+                if frame.dtype != np.uint8:
+                    frame = (frame * 255).astype(np.uint8)
+                pil_frame = Image.fromarray(frame)
+            else:
+                pil_frame = frame
+            pil_frames.append(pil_frame)
+        if pil_frames:
+            pil_frames[0].save(
+                gif_path,
+                save_all=True,
+                append_images=pil_frames[1:],
+                duration=1000 // fps,
+                loop=0
+            )
+            return gif_path
+        else:
+            raise Exception("No valid frames to save")
+def cleanup_temp_files():
+    """Clean up temporary files"""
+    temp_dir = tempfile.gettempdir()
+    # Clean up files older than 1 hour
+    current_time = time.time()
+    for filename in os.listdir(temp_dir):
+        if filename.startswith("generated_video"):
+            file_path = os.path.join(temp_dir, filename)
+            try:
+                if os.path.getmtime(file_path) < current_time - 3600:
+                    if os.path.isfile(file_path):
+                        os.unlink(file_path)
+                    elif os.path.isdir(file_path):
+                        shutil.rmtree(file_path)
+            except Exception:
+                pass
+def validate_prompt(prompt: str) -> bool:
+    """
+    Validate that the prompt is not empty and has reasonable length
+    Args:
+        prompt (str): Input prompt
+    Returns:
+        bool: True if prompt is valid
+    """
+    if not prompt or not prompt.strip():
+        return False
+    if len(prompt.strip()) < 3:
+        return False
+    if len(prompt.strip()) > 1000:
+        return False
+    return True
+def format_status_message(message: str, success: bool = True) -> str:
+    """
+    Format status message with appropriate emoji
+    Args:
+        message (str): Status message
+        success (bool): Whether the operation was successful
+    Returns:
+        str: Formatted status message
+    """
+    emoji = "✅" if success else "❌"
+    return f"{emoji} {message}"