AshBlanc commited on
Commit
9da0f47
·
1 Parent(s): 581dc21

Add application file

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ from diffusers import StableVideoDiffusionPipeline
4
+ import torch
5
+ import tempfile
6
+ import os
7
+
8
+ # Import music generation from separate file
9
+ from music import generate_music
10
+
11
+ # Load SmolLM-3B model once at startup
12
+ tokenizer = AutoTokenizer.from_pretrained("cognitivecomputations/smolllm-3b")
13
+ model = AutoModelForCausalLM.from_pretrained("cognitivecomputations/smolllm-3b")
14
+
15
+ # Load Wan2.1 1.3B video model once at startup
16
+ video_pipe = StableVideoDiffusionPipeline.from_pretrained("ByteDance/Wan-2-1-1-3B", torch_dtype=torch.float16).to("cuda")
17
+ video_pipe.enable_model_cpu_offload()
18
+
19
+ def generate_scenes_with_smol(script, style):
20
+ prompt = f"Break this {style.lower()} script into cinematic scenes with camera angles, characters, and mood.\nScript: {script}\nScene Breakdown:"
21
+ inputs = tokenizer(prompt, return_tensors="pt")
22
+ outputs = model.generate(**inputs, max_new_tokens=512)
23
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
24
+ scenes = [{"scene_id": i+1, "description": scene.strip()} for i, scene in enumerate(decoded.split("\n")) if scene.strip()]
25
+ return scenes
26
+
27
+ def generate_video_with_wan(prompt):
28
+ video = video_pipe(prompt, num_frames=24, height=512, width=512).frames[0] # Assuming single output
29
+ tmp_dir = tempfile.mkdtemp()
30
+ output_path = os.path.join(tmp_dir, "scene.mp4")
31
+ video[0].save(output_path, fps=8) # Save as GIF or MP4 depending on format support
32
+ return output_path
33
+
34
+ def process_script(script, style, want_music):
35
+ scenes = generate_scenes_with_smol(script, style)
36
+ video_clips = []
37
+
38
+ for scene in scenes:
39
+ text_prompt = scene['description']
40
+ video_path = generate_video_with_wan(text_prompt)
41
+ video_clips.append((scene['description'], video_path))
42
+
43
+ music_path = generate_music(script) if want_music else None
44
+ return video_clips, music_path
45
+
46
+ with gr.Blocks() as app:
47
+ gr.Markdown("# 🎮 Vividly MVP – AI Video Creator")
48
+
49
+ with gr.Row():
50
+ script_input = gr.Textbox(label="Video Script", lines=6)
51
+ style_input = gr.Dropdown(["Cinematic", "Vlog", "Explainer"], value="Cinematic", label="Video Style")
52
+ music_toggle = gr.Checkbox(label="Generate background music")
53
+
54
+ submit_btn = gr.Button("Generate")
55
+
56
+ video_outputs = gr.Video(label="Scene-wise Video Clips", interactive=False, visible=False)
57
+ music_player = gr.Audio(label="Generated Music", visible=False)
58
+
59
+ def wrap(script, style, music):
60
+ scenes, music_path = process_script(script, style, music)
61
+ first_video = scenes[0][1] if scenes else None
62
+ return gr.update(value=first_video, visible=True), gr.update(value=music_path, visible=music is True)
63
+
64
+ submit_btn.click(wrap, inputs=[script_input, style_input, music_toggle], outputs=[video_outputs, music_player])
65
+
66
+ app.launch()