AverageAiLiker commited on
Commit
838951c
·
verified ·
1 Parent(s): b4dbf40

Deploy Gradio app with multiple files

Browse files
Files changed (5) hide show
  1. app.py +239 -0
  2. config.py +26 -0
  3. models.py +68 -0
  4. requirements.txt +9 -0
  5. utils.py +146 -0
app.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ from diffusers import DiffusionPipeline
5
+ import numpy as np
6
+ from PIL import Image
7
+ import os
8
+ import tempfile
9
+ from typing import Optional, Tuple
10
+ import time
11
+
12
+ from config import MODEL_ID, DEFAULT_HEIGHT, DEFAULT_WIDTH, DEFAULT_NUM_FRAMES, DEFAULT_NUM_INFERENCE_STEPS
13
+ from utils import create_video_from_frames, save_video_temp, cleanup_temp_files
14
+ from models import load_pipeline
15
+
16
+ # Global pipeline variable
17
+ pipeline = None
18
+
19
+ @spaces.GPU(duration=300)
20
+ def initialize_model():
21
+ """Initialize the Open-Sora-v2 pipeline"""
22
+ global pipeline
23
+ if pipeline is None:
24
+ pipeline = load_pipeline()
25
+ return "Model loaded successfully!"
26
+
27
+ @spaces.GPU(duration=180)
28
+ def generate_video(
29
+ prompt: str,
30
+ height: int = DEFAULT_HEIGHT,
31
+ width: int = DEFAULT_WIDTH,
32
+ num_frames: int = DEFAULT_NUM_FRAMES,
33
+ num_inference_steps: int = DEFAULT_NUM_INFERENCE_STEPS,
34
+ seed: Optional[int] = None,
35
+ progress=gr.Progress()
36
+ ) -> Tuple[str, str]:
37
+ """
38
+ Generate a video from text prompt using Open-Sora-v2
39
+
40
+ Args:
41
+ prompt (str): Text description of the video to generate
42
+ height (int): Height of the video frames
43
+ width (int): Width of the video frames
44
+ num_frames (int): Number of frames to generate
45
+ num_inference_steps (int): Number of denoising steps
46
+ seed (int, optional): Random seed for reproducible generation
47
+
48
+ Returns:
49
+ Tuple[str, str]: Path to generated video file and status message
50
+ """
51
+ try:
52
+ # Initialize model if not already done
53
+ if pipeline is None:
54
+ progress(0.1, desc="Loading model...")
55
+ initialize_model()
56
+
57
+ # Set seed for reproducibility
58
+ if seed is not None:
59
+ torch.manual_seed(seed)
60
+
61
+ progress(0.2, desc="Generating video frames...")
62
+
63
+ # Generate video frames
64
+ video_frames = pipeline(
65
+ prompt=prompt,
66
+ height=height,
67
+ width=width,
68
+ num_frames=num_frames,
69
+ num_inference_steps=num_inference_steps,
70
+ guidance_scale=7.5,
71
+ ).frames
72
+
73
+ progress(0.8, desc="Processing video...")
74
+
75
+ # Convert frames to video
76
+ video_path = save_video_temp(video_frames, fps=24)
77
+
78
+ progress(1.0, desc="Complete!")
79
+
80
+ return video_path, f"✅ Video generated successfully! ({len(video_frames)} frames)"
81
+
82
+ except Exception as e:
83
+ error_msg = f"❌ Error generating video: {str(e)}"
84
+ return None, error_msg
85
+
86
+ def update_interface():
87
+ """Update interface based on model availability"""
88
+ return gr.update(interactive=True)
89
+
90
+ def create_demo():
91
+ """Create the Gradio demo interface"""
92
+
93
+ with gr.Blocks(
94
+ title="Open-Sora-v2 Text to Video",
95
+ theme=gr.themes.Soft(),
96
+ css="""
97
+ .gradio-container {
98
+ max-width: 1200px !important;
99
+ }
100
+ .generate-btn {
101
+ background: linear-gradient(45deg, #667eea 0%, #764ba2 100%) !important;
102
+ }
103
+ """
104
+ ) as demo:
105
+
106
+ gr.HTML("""
107
+ <div style="text-align: center; margin-bottom: 20px;">
108
+ <h1>🎬 Open-Sora-v2 Text to Video Generator</h1>
109
+ <p>Generate amazing videos from text descriptions using Open-Sora-v2 model</p>
110
+ <p><a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">Built with anycoder</a></p>
111
+ </div>
112
+ """)
113
+
114
+ with gr.Row():
115
+ with gr.Column(scale=2):
116
+ # Input section
117
+ gr.Markdown("## 📝 Input")
118
+
119
+ prompt_input = gr.Textbox(
120
+ label="Video Description",
121
+ placeholder="Describe the video you want to generate...",
122
+ lines=3,
123
+ value="A beautiful sunset over the ocean with waves gently rolling"
124
+ )
125
+
126
+ with gr.Accordion("⚙️ Advanced Settings", open=False):
127
+ with gr.Row():
128
+ height_input = gr.Number(
129
+ label="Height",
130
+ value=DEFAULT_HEIGHT,
131
+ minimum=256,
132
+ maximum=1024,
133
+ step=64
134
+ )
135
+ width_input = gr.Number(
136
+ label="Width",
137
+ value=DEFAULT_WIDTH,
138
+ minimum=256,
139
+ maximum=1024,
140
+ step=64
141
+ )
142
+
143
+ with gr.Row():
144
+ num_frames_input = gr.Slider(
145
+ label="Number of Frames",
146
+ value=DEFAULT_NUM_FRAMES,
147
+ minimum=16,
148
+ maximum=120,
149
+ step=8
150
+ )
151
+ num_steps_input = gr.Slider(
152
+ label="Inference Steps",
153
+ value=DEFAULT_NUM_INFERENCE_STEPS,
154
+ minimum=10,
155
+ maximum=100,
156
+ step=5
157
+ )
158
+
159
+ seed_input = gr.Number(
160
+ label="Seed (optional)",
161
+ value=None,
162
+ precision=0
163
+ )
164
+
165
+ generate_btn = gr.Button(
166
+ "🎥 Generate Video",
167
+ variant="primary",
168
+ size="lg",
169
+ elem_classes=["generate-btn"]
170
+ )
171
+
172
+ with gr.Column(scale=1):
173
+ # Output section
174
+ gr.Markdown("## 🎥 Output")
175
+
176
+ video_output = gr.Video(
177
+ label="Generated Video",
178
+ height=400
179
+ )
180
+
181
+ status_output = gr.Textbox(
182
+ label="Status",
183
+ interactive=False
184
+ )
185
+
186
+ # Example prompts
187
+ gr.Markdown("## 💡 Example Prompts")
188
+
189
+ examples = [
190
+ "A majestic eagle soaring through mountain peaks at sunrise",
191
+ "A busy city street with neon lights at night, cyberpunk style",
192
+ "A peaceful garden with butterflies fluttering around colorful flowers",
193
+ "A robot dancing in a futuristic disco with colorful lights",
194
+ "A serene lake reflecting autumn trees with falling leaves"
195
+ ]
196
+
197
+ with gr.Row():
198
+ for i, example in enumerate(examples):
199
+ example_btn = gr.Button(example, size="sm")
200
+ example_btn.click(
201
+ lambda x=example: x,
202
+ outputs=prompt_input
203
+ )
204
+
205
+ # Event handlers
206
+ generate_btn.click(
207
+ fn=generate_video,
208
+ inputs=[
209
+ prompt_input,
210
+ height_input,
211
+ width_input,
212
+ num_frames_input,
213
+ num_steps_input,
214
+ seed_input
215
+ ],
216
+ outputs=[video_output, status_output],
217
+ show_progress=True
218
+ )
219
+
220
+ # Initialize model on startup
221
+ demo.load(
222
+ fn=initialize_model,
223
+ outputs=[status_output]
224
+ )
225
+
226
+ # Cleanup on page close
227
+ demo.unload(
228
+ fn=cleanup_temp_files
229
+ )
230
+
231
+ return demo
232
+
233
+ if __name__ == "__main__":
234
+ demo = create_demo()
235
+ demo.launch(
236
+ share=True,
237
+ show_error=True,
238
+ show_api=True
239
+ )
config.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model configuration
2
+ MODEL_ID = "hpcai-tech/Open-Sora-v2"
3
+
4
+ # Default video generation parameters
5
+ DEFAULT_HEIGHT = 320
6
+ DEFAULT_WIDTH = 576
7
+ DEFAULT_NUM_FRAMES = 64
8
+ DEFAULT_NUM_INFERENCE_STEPS = 50
9
+
10
+ # UI Configuration
11
+ MAX_PROMPT_LENGTH = 1000
12
+ MIN_HEIGHT = 256
13
+ MAX_HEIGHT = 1024
14
+ MIN_WIDTH = 256
15
+ MAX_WIDTH = 1024
16
+ MIN_FRAMES = 16
17
+ MAX_FRAMES = 120
18
+ MIN_STEPS = 10
19
+ MAX_STEPS = 100
20
+
21
+ # File paths
22
+ TEMP_DIR = tempfile.gettempdir()
23
+ VIDEO_DIR = os.path.join(TEMP_DIR, "opensora_videos")
24
+
25
+ # Ensure video directory exists
26
+ os.makedirs(VIDEO_DIR, exist_ok=True)
models.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from diffusers import DiffusionPipeline
3
+ import spaces
4
+ from config import MODEL_ID
5
+
6
+ def load_pipeline():
7
+ """
8
+ Load and configure the Open-Sora-v2 pipeline
9
+ """
10
+ try:
11
+ # Load the pipeline with appropriate configuration
12
+ pipeline = DiffusionPipeline.from_pretrained(
13
+ MODEL_ID,
14
+ torch_dtype=torch.float16,
15
+ variant="fp16",
16
+ use_safetensors=True
17
+ )
18
+
19
+ # Move to GPU if available
20
+ if torch.cuda.is_available():
21
+ pipeline = pipeline.to("cuda")
22
+
23
+ # Enable memory efficient attention if available
24
+ try:
25
+ pipeline.enable_xformers_memory_efficient_attention()
26
+ except Exception:
27
+ print("xformers not available, using default attention")
28
+
29
+ # Enable CPU offloading for memory efficiency
30
+ pipeline.enable_model_cpu_offload()
31
+
32
+ return pipeline
33
+
34
+ except Exception as e:
35
+ print(f"Error loading pipeline: {e}")
36
+ raise
37
+
38
+ @spaces.GPU(duration=1500)
39
+ def compile_transformer():
40
+ """
41
+ Optional: Compile the transformer for better performance
42
+ This is experimental and may not work with all models
43
+ """
44
+ try:
45
+ pipeline = load_pipeline()
46
+
47
+ # Capture example inputs
48
+ with spaces.aoti_capture(pipeline.transformer) as call:
49
+ pipeline("test prompt generation")
50
+
51
+ # Export the model
52
+ exported = torch.export.export(
53
+ pipeline.transformer,
54
+ args=call.args,
55
+ kwargs=call.kwargs,
56
+ )
57
+
58
+ # Compile the exported model
59
+ compiled_transformer = spaces.aoti_compile(exported)
60
+
61
+ # Apply compiled model to pipeline
62
+ spaces.aoti_apply(compiled_transformer, pipeline.transformer)
63
+
64
+ return pipeline
65
+
66
+ except Exception as e:
67
+ print(f"Compilation failed, using unoptimized model: {e}")
68
+ return load_pipeline()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ diffusers
4
+ transformers
5
+ accelerate
6
+ imageio
7
+ Pillow
8
+ numpy
9
+ spaces
utils.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import tempfile
3
+ import os
4
+ import imageio
5
+ from PIL import Image
6
+ from typing import List, Optional
7
+ import shutil
8
+
9
+ def create_video_from_frames(frames: List[np.ndarray], fps: int = 24) -> str:
10
+ """
11
+ Create a video file from a list of frames
12
+
13
+ Args:
14
+ frames (List[np.ndarray]): List of video frames as numpy arrays
15
+ fps (int): Frames per second for the output video
16
+
17
+ Returns:
18
+ str: Path to the generated video file
19
+ """
20
+ if not frames:
21
+ raise ValueError("No frames provided")
22
+
23
+ # Create temporary file
24
+ temp_dir = tempfile.mkdtemp()
25
+ video_path = os.path.join(temp_dir, "generated_video.mp4")
26
+
27
+ # Ensure frames are in the right format
28
+ processed_frames = []
29
+ for frame in frames:
30
+ if isinstance(frame, np.ndarray):
31
+ # Convert numpy array to PIL Image
32
+ if frame.dtype != np.uint8:
33
+ frame = (frame * 255).astype(np.uint8)
34
+ if len(frame.shape) == 3 and frame.shape[2] == 3:
35
+ # RGB image
36
+ pil_image = Image.fromarray(frame, mode='RGB')
37
+ elif len(frame.shape) == 3 and frame.shape[2] == 4:
38
+ # RGBA image
39
+ pil_image = Image.fromarray(frame, mode='RGBA')
40
+ else:
41
+ # Grayscale
42
+ pil_image = Image.fromarray(frame, mode='L')
43
+ else:
44
+ # Assume it's already a PIL Image
45
+ pil_image = frame
46
+
47
+ processed_frames.append(pil_image)
48
+
49
+ # Save as video
50
+ with imageio.get_writer(video_path, fps=fps) as writer:
51
+ for frame in processed_frames:
52
+ # Convert PIL to numpy array
53
+ frame_array = np.array(frame)
54
+ writer.append_data(frame_array)
55
+
56
+ return video_path
57
+
58
+ def save_video_temp(frames: List, fps: int = 24) -> str:
59
+ """
60
+ Save video frames to a temporary file
61
+
62
+ Args:
63
+ frames (List): List of video frames
64
+ fps (int): Frames per second
65
+
66
+ Returns:
67
+ str: Path to the saved video file
68
+ """
69
+ try:
70
+ return create_video_from_frames(frames, fps)
71
+ except Exception as e:
72
+ # Fallback: save as GIF if video creation fails
73
+ temp_dir = tempfile.mkdtemp()
74
+ gif_path = os.path.join(temp_dir, "generated_video.gif")
75
+
76
+ # Convert frames to PIL Images and save as GIF
77
+ pil_frames = []
78
+ for frame in frames:
79
+ if isinstance(frame, np.ndarray):
80
+ if frame.dtype != np.uint8:
81
+ frame = (frame * 255).astype(np.uint8)
82
+ pil_frame = Image.fromarray(frame)
83
+ else:
84
+ pil_frame = frame
85
+ pil_frames.append(pil_frame)
86
+
87
+ if pil_frames:
88
+ pil_frames[0].save(
89
+ gif_path,
90
+ save_all=True,
91
+ append_images=pil_frames[1:],
92
+ duration=1000 // fps,
93
+ loop=0
94
+ )
95
+ return gif_path
96
+ else:
97
+ raise Exception("No valid frames to save")
98
+
99
+ def cleanup_temp_files():
100
+ """Clean up temporary files"""
101
+ temp_dir = tempfile.gettempdir()
102
+ # Clean up files older than 1 hour
103
+ current_time = time.time()
104
+ for filename in os.listdir(temp_dir):
105
+ if filename.startswith("generated_video"):
106
+ file_path = os.path.join(temp_dir, filename)
107
+ try:
108
+ if os.path.getmtime(file_path) < current_time - 3600:
109
+ if os.path.isfile(file_path):
110
+ os.unlink(file_path)
111
+ elif os.path.isdir(file_path):
112
+ shutil.rmtree(file_path)
113
+ except Exception:
114
+ pass
115
+
116
+ def validate_prompt(prompt: str) -> bool:
117
+ """
118
+ Validate that the prompt is not empty and has reasonable length
119
+
120
+ Args:
121
+ prompt (str): Input prompt
122
+
123
+ Returns:
124
+ bool: True if prompt is valid
125
+ """
126
+ if not prompt or not prompt.strip():
127
+ return False
128
+ if len(prompt.strip()) < 3:
129
+ return False
130
+ if len(prompt.strip()) > 1000:
131
+ return False
132
+ return True
133
+
134
+ def format_status_message(message: str, success: bool = True) -> str:
135
+ """
136
+ Format status message with appropriate emoji
137
+
138
+ Args:
139
+ message (str): Status message
140
+ success (bool): Whether the operation was successful
141
+
142
+ Returns:
143
+ str: Formatted status message
144
+ """
145
+ emoji = "✅" if success else "❌"
146
+ return f"{emoji} {message}"