Spaces:
Sleeping
Sleeping
Upload 51 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +7 -0
- src/__pycache__/utils.cpython-313.pyc +0 -0
- src/app.py +288 -0
- src/crud/__pycache__/vector_store.cpython-313.pyc +0 -0
- src/crud/vector_store.py +140 -0
- src/data/images/car_1.jpg +3 -0
- src/data/images/car_2.jpg +3 -0
- src/data/images/cat_1.jpg +0 -0
- src/data/images/cat_2.jpg +0 -0
- src/data/images/cat_3.jpg +0 -0
- src/data/images/motorcycle_1.jpg +0 -0
- src/data/images/motorcycle_2.jpg +3 -0
- src/data/images/motorcycle_3.jpg +3 -0
- src/embedding_creation.ipynb +3 -0
- src/mm_rag.ipynb +0 -0
- src/preprocess/__pycache__/embedding.cpython-313.pyc +0 -0
- src/preprocess/__pycache__/preprocessing.cpython-313.pyc +0 -0
- src/preprocess/embedding.py +69 -0
- src/preprocess/preprocessing.py +65 -0
- src/preprocessing_video.ipynb +0 -0
- src/shared_data/videos/video1/7Hcg-rLYwdM.en.vtt +85 -0
- src/shared_data/videos/video1/Welcome back to Planet Earth.mp4 +3 -0
- src/shared_data/videos/video1/audio.mp3 +3 -0
- src/shared_data/videos/video1/extracted_frame/frame_0.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_1.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_10.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_11.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_12.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_13.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_14.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_15.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_16.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_17.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_18.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_19.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_2.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_20.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_21.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_22.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_23.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_24.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_25.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_3.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_4.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_5.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_6.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_7.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_8.jpg +0 -0
- src/shared_data/videos/video1/extracted_frame/frame_9.jpg +0 -0
- src/shared_data/videos/video1/generated_captions.vtt +71 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
src/data/images/car_1.jpg filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
src/data/images/car_2.jpg filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
src/data/images/motorcycle_2.jpg filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
src/data/images/motorcycle_3.jpg filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
src/embedding_creation.ipynb filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
src/shared_data/videos/video1/audio.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
src/shared_data/videos/video1/Welcome[[:space:]]back[[:space:]]to[[:space:]]Planet[[:space:]]Earth.mp4 filter=lfs diff=lfs merge=lfs -text
|
src/__pycache__/utils.cpython-313.pyc
ADDED
|
Binary file (13.8 kB). View file
|
|
|
src/app.py
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
import os
|
| 3 |
+
from os import path as osp
|
| 4 |
+
import gradio as gr
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
from crud.vector_store import MultimodalLanceDB
|
| 7 |
+
from preprocess.embedding import BridgeTowerEmbeddings
|
| 8 |
+
from preprocess.preprocessing import extract_and_save_frames_and_metadata
|
| 9 |
+
#from utils import encode_image
|
| 10 |
+
from utils import (
|
| 11 |
+
download_video,
|
| 12 |
+
get_transcript_vtt,
|
| 13 |
+
download_youtube_subtitle,
|
| 14 |
+
get_video_id_from_url,
|
| 15 |
+
str2time,
|
| 16 |
+
maintain_aspect_ratio_resize,
|
| 17 |
+
getSubs,
|
| 18 |
+
encode_image,
|
| 19 |
+
)
|
| 20 |
+
from mistralai import Mistral
|
| 21 |
+
from langchain_core.runnables import (
|
| 22 |
+
RunnableParallel,
|
| 23 |
+
RunnablePassthrough,
|
| 24 |
+
RunnableLambda
|
| 25 |
+
)
|
| 26 |
+
from PIL import Image
|
| 27 |
+
|
| 28 |
+
import lancedb
|
| 29 |
+
|
| 30 |
+
# -------------------------------
|
| 31 |
+
# 1. Setup
|
| 32 |
+
# -------------------------------
|
| 33 |
+
load_dotenv()
|
| 34 |
+
if os.getenv("SPACE_ID"):
|
| 35 |
+
LANCEDB_HOST_FILE = "/tmp/.lancedb"
|
| 36 |
+
os.makedirs("/tmp", exist_ok=True)
|
| 37 |
+
else:
|
| 38 |
+
LANCEDB_HOST_FILE = "./shared_data/.lancedb"
|
| 39 |
+
TBL_NAME = "vectorstore"
|
| 40 |
+
|
| 41 |
+
db = lancedb.connect(LANCEDB_HOST_FILE)
|
| 42 |
+
embedder = BridgeTowerEmbeddings()
|
| 43 |
+
|
| 44 |
+
# -------------------------------
|
| 45 |
+
# 2. Preprocessing + Storage
|
| 46 |
+
# -------------------------------
|
| 47 |
+
def preprocess_and_store(youtube_url: str):
|
| 48 |
+
"""Download video, extract frames+metadata, embed & store in LanceDB"""
|
| 49 |
+
|
| 50 |
+
video_url = youtube_url
|
| 51 |
+
|
| 52 |
+
if os.getenv("SPACE_ID"):
|
| 53 |
+
video_dir = "/tmp/videos/video1"
|
| 54 |
+
else:
|
| 55 |
+
video_dir = "./shared_data/videos/video1"
|
| 56 |
+
# download Youtube video to ./shared_data/videos/video1
|
| 57 |
+
video_filepath = download_video(video_url, video_dir)
|
| 58 |
+
|
| 59 |
+
# download Youtube video's subtitle to ./shared_data/videos/video1
|
| 60 |
+
video_transcript_filepath = download_youtube_subtitle(video_url, video_dir)
|
| 61 |
+
|
| 62 |
+
extracted_frames_path = osp.join(video_dir, 'extracted_frame')
|
| 63 |
+
|
| 64 |
+
# create these output folders if not existing
|
| 65 |
+
Path(extracted_frames_path).mkdir(parents=True, exist_ok=True)
|
| 66 |
+
Path(video_dir).mkdir(parents=True, exist_ok=True)
|
| 67 |
+
|
| 68 |
+
# call the function to extract frames and metadatas
|
| 69 |
+
metadatas = extract_and_save_frames_and_metadata(
|
| 70 |
+
video_filepath,
|
| 71 |
+
video_transcript_filepath,
|
| 72 |
+
extracted_frames_path,
|
| 73 |
+
video_dir,
|
| 74 |
+
)
|
| 75 |
+
# collect transcripts and image paths
|
| 76 |
+
video_trans = [vid['transcript'] for vid in metadatas]
|
| 77 |
+
video_img_path = [vid['extracted_frame_path'] for vid in metadatas]
|
| 78 |
+
|
| 79 |
+
n = 7
|
| 80 |
+
updated_video_trans = [
|
| 81 |
+
' '.join(video_trans[i-int(n/2) : i+int(n/2)]) if i-int(n/2) >= 0 else
|
| 82 |
+
' '.join(video_trans[0 : i + int(n/2)]) for i in range(len(video_trans))
|
| 83 |
+
]
|
| 84 |
+
# also need to update the updated transcripts in metadata
|
| 85 |
+
for i in range(len(updated_video_trans)):
|
| 86 |
+
metadatas[i]['transcript'] = updated_video_trans[i]
|
| 87 |
+
|
| 88 |
+
_ = MultimodalLanceDB.from_text_image_pairs(
|
| 89 |
+
texts=updated_video_trans,
|
| 90 |
+
image_paths=video_img_path,
|
| 91 |
+
embedding=embedder,
|
| 92 |
+
metadatas=metadatas,
|
| 93 |
+
connection=db,
|
| 94 |
+
table_name=TBL_NAME,
|
| 95 |
+
mode="overwrite",
|
| 96 |
+
)
|
| 97 |
+
return f"✅ Video processed and stored: {youtube_url}"
|
| 98 |
+
|
| 99 |
+
# -------------------------------
|
| 100 |
+
# 3. Retrieval + Prompt Functions
|
| 101 |
+
# -------------------------------
|
| 102 |
+
vectorstore = MultimodalLanceDB(
|
| 103 |
+
uri=LANCEDB_HOST_FILE,
|
| 104 |
+
embedding=embedder,
|
| 105 |
+
table_name=TBL_NAME
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
retriever_module = vectorstore.as_retriever(
|
| 109 |
+
search_type="similarity",
|
| 110 |
+
search_kwargs={"k": 3}
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
def prompt_processing(input):
|
| 114 |
+
retrieved_results = input["retrieved_results"]
|
| 115 |
+
user_query = input["user_query"]
|
| 116 |
+
#retrieved_results = retriever_module.invoke(user_query)
|
| 117 |
+
|
| 118 |
+
retrieved_results = retrieved_results[0]
|
| 119 |
+
prompt_template = (
|
| 120 |
+
"The transcript associated with the image is '{transcript}'. "
|
| 121 |
+
"{user_query}"
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
retrieved_metadata = retrieved_results.metadata
|
| 125 |
+
transcript = retrieved_metadata["transcript"]
|
| 126 |
+
frame_path = retrieved_metadata["extracted_frame_path"]
|
| 127 |
+
|
| 128 |
+
return {
|
| 129 |
+
"prompt": prompt_template.format(transcript=transcript, user_query=user_query),
|
| 130 |
+
"frame_path": frame_path,
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def lvlm_inference(input):
|
| 135 |
+
|
| 136 |
+
# get the retrieved results and user's query
|
| 137 |
+
lvlm_prompt = input['prompt']
|
| 138 |
+
frame_path = input['frame_path']
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
# Retrieve the API key from environment variables
|
| 142 |
+
api_key = os.getenv("MISTRAL_API_KEY")
|
| 143 |
+
|
| 144 |
+
# Initialize the Mistral client
|
| 145 |
+
client = Mistral(api_key=api_key)
|
| 146 |
+
|
| 147 |
+
base64_image = encode_image(frame_path)
|
| 148 |
+
|
| 149 |
+
# Define the messages for the chat
|
| 150 |
+
messages = [
|
| 151 |
+
{
|
| 152 |
+
"role": "user",
|
| 153 |
+
"content": [
|
| 154 |
+
{
|
| 155 |
+
"type": "text",
|
| 156 |
+
"text": lvlm_prompt
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"type": "image_url",
|
| 160 |
+
"image_url": f"data:image/jpeg;base64,{base64_image}"
|
| 161 |
+
}
|
| 162 |
+
]
|
| 163 |
+
}
|
| 164 |
+
]
|
| 165 |
+
|
| 166 |
+
# Get the chat response
|
| 167 |
+
chat_response = client.chat.complete(
|
| 168 |
+
model="pixtral-12b-2409",
|
| 169 |
+
messages=messages
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
# Print the content of the response
|
| 173 |
+
return chat_response.choices[0].message.content, frame_path
|
| 174 |
+
|
| 175 |
+
# LangChain Runnable chain
|
| 176 |
+
prompt_processing_module = RunnableLambda(prompt_processing)
|
| 177 |
+
lvlm_inference_module = RunnableLambda(lvlm_inference)
|
| 178 |
+
|
| 179 |
+
mm_rag_chain = (
|
| 180 |
+
RunnableParallel({"retrieved_results": retriever_module, "user_query": RunnablePassthrough()})
|
| 181 |
+
| prompt_processing_module
|
| 182 |
+
| lvlm_inference_module
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
# -------------------------------
|
| 186 |
+
# 4. Chat API for Gradio
|
| 187 |
+
# -------------------------------
|
| 188 |
+
video_loaded = False
|
| 189 |
+
|
| 190 |
+
def load_video(youtube_url):
|
| 191 |
+
global video_loaded
|
| 192 |
+
status = preprocess_and_store(youtube_url)
|
| 193 |
+
video_loaded = True
|
| 194 |
+
return status
|
| 195 |
+
|
| 196 |
+
def chat_interface(message, history):
|
| 197 |
+
if not video_loaded:
|
| 198 |
+
return "", history, None
|
| 199 |
+
|
| 200 |
+
final_text_response, frame_path = mm_rag_chain.invoke(message)
|
| 201 |
+
history.append((message, final_text_response))
|
| 202 |
+
|
| 203 |
+
# Load and return the image
|
| 204 |
+
try:
|
| 205 |
+
retrieved_image = Image.open(frame_path)
|
| 206 |
+
except Exception as e:
|
| 207 |
+
print(f"Error loading image: {e}")
|
| 208 |
+
retrieved_image = None
|
| 209 |
+
|
| 210 |
+
return "", history, retrieved_image
|
| 211 |
+
|
| 212 |
+
# -------------------------------
|
| 213 |
+
# 5. Enhanced Gradio Interface
|
| 214 |
+
# -------------------------------
|
| 215 |
+
with gr.Blocks(title="Multimodal RAG Video Chat") as demo:
|
| 216 |
+
gr.Markdown("# 🎬 Multimodal RAG Video Chat\nChat with YouTube clips using BridgeTower + LanceDB + Pixtral!")
|
| 217 |
+
|
| 218 |
+
with gr.Tab("1. Load Video"):
|
| 219 |
+
youtube_url = gr.Textbox(
|
| 220 |
+
label="YouTube URL",
|
| 221 |
+
placeholder="Paste a YouTube link here...",
|
| 222 |
+
lines=1
|
| 223 |
+
)
|
| 224 |
+
load_btn = gr.Button("Process Video", variant="primary")
|
| 225 |
+
status = gr.Textbox(label="Status", interactive=False)
|
| 226 |
+
load_btn.click(load_video, inputs=youtube_url, outputs=status)
|
| 227 |
+
|
| 228 |
+
with gr.Tab("2. Chat with Video"):
|
| 229 |
+
with gr.Row():
|
| 230 |
+
with gr.Column(scale=2):
|
| 231 |
+
chatbot = gr.Chatbot(
|
| 232 |
+
label="Chat about the video",
|
| 233 |
+
height=500,
|
| 234 |
+
show_label=True
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
with gr.Column(scale=1):
|
| 238 |
+
retrieved_image = gr.Image(
|
| 239 |
+
label="Retrieved Frame",
|
| 240 |
+
height=400,
|
| 241 |
+
show_label=True,
|
| 242 |
+
interactive=False
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
with gr.Row():
|
| 246 |
+
msg = gr.Textbox(
|
| 247 |
+
label="Your question",
|
| 248 |
+
placeholder="Ask something about the video...",
|
| 249 |
+
lines=2,
|
| 250 |
+
scale=4
|
| 251 |
+
)
|
| 252 |
+
send_btn = gr.Button("Send", variant="primary", scale=1)
|
| 253 |
+
|
| 254 |
+
# Clear message after sending
|
| 255 |
+
msg.submit(chat_interface, inputs=[msg, chatbot], outputs=[msg, chatbot, retrieved_image])
|
| 256 |
+
send_btn.click(chat_interface, inputs=[msg, chatbot], outputs=[msg, chatbot, retrieved_image])
|
| 257 |
+
|
| 258 |
+
# Add some usage instructions
|
| 259 |
+
with gr.Tab("📖 Instructions"):
|
| 260 |
+
gr.Markdown("""
|
| 261 |
+
## How to use this Multimodal RAG system:
|
| 262 |
+
|
| 263 |
+
1. **Load Video**:
|
| 264 |
+
- Go to the "Load Video" tab
|
| 265 |
+
- Paste a YouTube URL
|
| 266 |
+
- Click "Process Video" and wait for processing to complete
|
| 267 |
+
|
| 268 |
+
2. **Chat with Video**:
|
| 269 |
+
- Go to the "Chat with Video" tab
|
| 270 |
+
- Ask questions about the video content
|
| 271 |
+
- The system will retrieve the most relevant frame and provide answers
|
| 272 |
+
- The retrieved frame will be displayed on the right side
|
| 273 |
+
|
| 274 |
+
## Features:
|
| 275 |
+
- 🎥 Processes YouTube videos automatically
|
| 276 |
+
- 🧠 Uses BridgeTower for multimodal embeddings
|
| 277 |
+
- 💾 Stores data in LanceDB vector database
|
| 278 |
+
- 🤖 Powered by Pixtral vision-language model
|
| 279 |
+
- 🖼️ Shows relevant video frames alongside responses
|
| 280 |
+
""")
|
| 281 |
+
|
| 282 |
+
if __name__ == "__main__":
|
| 283 |
+
print('App starting...')
|
| 284 |
+
# For HF Spaces, use default host and port
|
| 285 |
+
if os.getenv("SPACE_ID"):
|
| 286 |
+
demo.launch()
|
| 287 |
+
else:
|
| 288 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
src/crud/__pycache__/vector_store.cpython-313.pyc
ADDED
|
Binary file (6.15 kB). View file
|
|
|
src/crud/vector_store.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Iterable, List, Optional
|
| 2 |
+
from langchain_core.embeddings import Embeddings
|
| 3 |
+
import uuid
|
| 4 |
+
from langchain_community.vectorstores.lancedb import LanceDB
|
| 5 |
+
|
| 6 |
+
class MultimodalLanceDB(LanceDB):
|
| 7 |
+
"""`LanceDB` vector store to process multimodal data
|
| 8 |
+
|
| 9 |
+
Parameters:
|
| 10 |
+
-----------
|
| 11 |
+
connection: Any
|
| 12 |
+
LanceDB connection to use. If not provided, a new connection will be created.
|
| 13 |
+
embedding: Embeddings
|
| 14 |
+
Embedding to use for the vectorstore.
|
| 15 |
+
vector_key: str
|
| 16 |
+
Key to use for the vector in the database. Defaults to ``vector``.
|
| 17 |
+
id_key: str
|
| 18 |
+
Key to use for the id in the database. Defaults to ``id``.
|
| 19 |
+
text_key: str
|
| 20 |
+
Key to use for the text in the database. Defaults to ``text``.
|
| 21 |
+
image_path_key: str
|
| 22 |
+
Key to use for the path to image in the database. Defaults to ``image_path``.
|
| 23 |
+
table_name: str
|
| 24 |
+
Name of the table to use. Defaults to ``vectorstore``.
|
| 25 |
+
api_key: str
|
| 26 |
+
API key to use for LanceDB cloud database.
|
| 27 |
+
region: str
|
| 28 |
+
Region to use for LanceDB cloud database.
|
| 29 |
+
mode: str
|
| 30 |
+
Mode to use for adding data to the table. Defaults to ``overwrite``.
|
| 31 |
+
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
def __init__(
|
| 35 |
+
self,
|
| 36 |
+
connection: Optional[Any] = None,
|
| 37 |
+
embedding: Optional[Embeddings] = None,
|
| 38 |
+
uri: Optional[str] = "/tmp/lancedb",
|
| 39 |
+
vector_key: Optional[str] = "vector",
|
| 40 |
+
id_key: Optional[str] = "id",
|
| 41 |
+
text_key: Optional[str] = "text",
|
| 42 |
+
image_path_key: Optional[str] = "image_path",
|
| 43 |
+
table_name: Optional[str] = "vectorstore",
|
| 44 |
+
api_key: Optional[str] = None,
|
| 45 |
+
region: Optional[str] = None,
|
| 46 |
+
mode: Optional[str] = "append",
|
| 47 |
+
):
|
| 48 |
+
super(MultimodalLanceDB, self).__init__(connection, embedding, uri, vector_key, id_key, text_key, table_name, api_key, region, mode)
|
| 49 |
+
self._image_path_key = image_path_key
|
| 50 |
+
|
| 51 |
+
def add_text_image_pairs(
|
| 52 |
+
self,
|
| 53 |
+
texts: Iterable[str],
|
| 54 |
+
image_paths: Iterable[str],
|
| 55 |
+
metadatas: Optional[List[dict]] = None,
|
| 56 |
+
ids: Optional[List[str]] = None,
|
| 57 |
+
**kwargs: Any,
|
| 58 |
+
) -> List[str]:
|
| 59 |
+
"""Turn text-image pairs into embedding and add it to the database
|
| 60 |
+
|
| 61 |
+
Parameters:
|
| 62 |
+
----------
|
| 63 |
+
texts: Iterable[str]
|
| 64 |
+
Iterable of strings to combine with corresponding images to add to the vectorstore.
|
| 65 |
+
images: Iterable[str]
|
| 66 |
+
Iterable of path-to-images as strings to combine with corresponding texts to add to the vectorstore.
|
| 67 |
+
metadatas: List[str]
|
| 68 |
+
Optional list of metadatas associated with the texts.
|
| 69 |
+
ids: List[str]
|
| 70 |
+
Optional list of ids to associate with the texts.
|
| 71 |
+
|
| 72 |
+
Returns:
|
| 73 |
+
--------
|
| 74 |
+
List of ids of the added text-image pairs.
|
| 75 |
+
"""
|
| 76 |
+
# the length of texts must be equal to the length of images
|
| 77 |
+
assert len(texts)==len(image_paths), "the len of transcripts should be equal to the len of images"
|
| 78 |
+
|
| 79 |
+
print(f'The length of texts is {len(texts)}')
|
| 80 |
+
|
| 81 |
+
# Embed texts and create documents
|
| 82 |
+
docs = []
|
| 83 |
+
ids = ids or [str(uuid.uuid4()) for _ in texts]
|
| 84 |
+
embeddings = self._embedding.embed_image_text_pairs(texts=list(texts), images=list(image_paths)) # type: ignore
|
| 85 |
+
for idx, text in enumerate(texts):
|
| 86 |
+
embedding = embeddings[idx]
|
| 87 |
+
metadata = metadatas[idx] if metadatas else {"id": ids[idx]}
|
| 88 |
+
docs.append(
|
| 89 |
+
{
|
| 90 |
+
self._vector_key: embedding,
|
| 91 |
+
self._id_key: ids[idx],
|
| 92 |
+
self._text_key: text,
|
| 93 |
+
self._image_path_key : image_paths[idx],
|
| 94 |
+
"metadata": metadata,
|
| 95 |
+
}
|
| 96 |
+
)
|
| 97 |
+
print(f'Adding {len(docs)} text-image pairs to the vectorstore...')
|
| 98 |
+
|
| 99 |
+
if 'mode' in kwargs:
|
| 100 |
+
mode = kwargs['mode']
|
| 101 |
+
else:
|
| 102 |
+
mode = self.mode
|
| 103 |
+
if self._table_name in self._connection.table_names():
|
| 104 |
+
tbl = self._connection.open_table(self._table_name)
|
| 105 |
+
if self.api_key is None:
|
| 106 |
+
tbl.add(docs)
|
| 107 |
+
else:
|
| 108 |
+
tbl.add(docs)
|
| 109 |
+
else:
|
| 110 |
+
self._connection.create_table(self._table_name, data=docs)
|
| 111 |
+
return ids
|
| 112 |
+
|
| 113 |
+
@classmethod
|
| 114 |
+
def from_text_image_pairs(
|
| 115 |
+
cls,
|
| 116 |
+
texts: List[str],
|
| 117 |
+
image_paths: List[str],
|
| 118 |
+
embedding: Embeddings,
|
| 119 |
+
metadatas: Optional[List[dict]] = None,
|
| 120 |
+
connection: Any = None,
|
| 121 |
+
vector_key: Optional[str] = "vector",
|
| 122 |
+
id_key: Optional[str] = "id",
|
| 123 |
+
text_key: Optional[str] = "text",
|
| 124 |
+
image_path_key: Optional[str] = "image_path",
|
| 125 |
+
table_name: Optional[str] = "vectorstore",
|
| 126 |
+
**kwargs: Any,
|
| 127 |
+
):
|
| 128 |
+
|
| 129 |
+
instance = MultimodalLanceDB(
|
| 130 |
+
connection=connection,
|
| 131 |
+
embedding=embedding,
|
| 132 |
+
vector_key=vector_key,
|
| 133 |
+
id_key=id_key,
|
| 134 |
+
text_key=text_key,
|
| 135 |
+
image_path_key=image_path_key,
|
| 136 |
+
table_name=table_name,
|
| 137 |
+
)
|
| 138 |
+
instance.add_text_image_pairs(texts, image_paths, metadatas=metadatas, **kwargs)
|
| 139 |
+
|
| 140 |
+
return instance
|
src/data/images/car_1.jpg
ADDED
|
Git LFS Details
|
src/data/images/car_2.jpg
ADDED
|
Git LFS Details
|
src/data/images/cat_1.jpg
ADDED
|
src/data/images/cat_2.jpg
ADDED
|
src/data/images/cat_3.jpg
ADDED
|
src/data/images/motorcycle_1.jpg
ADDED
|
src/data/images/motorcycle_2.jpg
ADDED
|
Git LFS Details
|
src/data/images/motorcycle_3.jpg
ADDED
|
Git LFS Details
|
src/embedding_creation.ipynb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8d2b46e0c0b041904c02be7a0878a8b6b59e0ee98fff649bd8a7b38134c2dc6
|
| 3 |
+
size 47954568
|
src/mm_rag.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/preprocess/__pycache__/embedding.cpython-313.pyc
ADDED
|
Binary file (2.9 kB). View file
|
|
|
src/preprocess/__pycache__/preprocessing.cpython-313.pyc
ADDED
|
Binary file (2.19 kB). View file
|
|
|
src/preprocess/embedding.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from utils import encode_image
|
| 2 |
+
from utils import bt_embeddings
|
| 3 |
+
from tqdm import tqdm
|
| 4 |
+
from typing import List
|
| 5 |
+
from langchain_core.embeddings import Embeddings
|
| 6 |
+
from langchain_core.pydantic_v1 import BaseModel
|
| 7 |
+
|
| 8 |
+
class BridgeTowerEmbeddings(BaseModel,Embeddings):
|
| 9 |
+
""" BridgeTower embedding model """
|
| 10 |
+
|
| 11 |
+
def embed_image_text_pairs(self, texts: List[str], images: List[str], batch_size=2) -> List[List[float]]:
|
| 12 |
+
"""Embed a list of image-text pairs using BridgeTower.
|
| 13 |
+
|
| 14 |
+
Parameters:
|
| 15 |
+
-----------
|
| 16 |
+
texts: str
|
| 17 |
+
The list of texts to embed.
|
| 18 |
+
images: List
|
| 19 |
+
The list of path-to-images to embed
|
| 20 |
+
batch_size: int
|
| 21 |
+
The batch size to process, default to 2
|
| 22 |
+
|
| 23 |
+
Returns:
|
| 24 |
+
--------
|
| 25 |
+
List of embeddings, one for each image-text pairs.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
# the length of texts must be equal to the length of images
|
| 29 |
+
assert len(texts)==len(images), "the len of captions should be equal to the len of images"
|
| 30 |
+
|
| 31 |
+
print(f"Embedding {len(texts)} image-text pairs...")
|
| 32 |
+
|
| 33 |
+
embeddings = []
|
| 34 |
+
for path_to_img, text in tqdm(zip(images, texts), total=len(images), desc="Processing pairs"):
|
| 35 |
+
embedding = bt_embeddings(text, encode_image(path_to_img))
|
| 36 |
+
embeddings.append(embedding)
|
| 37 |
+
return embeddings
|
| 38 |
+
|
| 39 |
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
| 40 |
+
"""Embed a list of documents using BridgeTower.
|
| 41 |
+
|
| 42 |
+
Parameters:
|
| 43 |
+
-----------
|
| 44 |
+
texts: str
|
| 45 |
+
The list of texts to embed.
|
| 46 |
+
|
| 47 |
+
Returns:
|
| 48 |
+
--------
|
| 49 |
+
List of embeddings, one for each text.
|
| 50 |
+
"""
|
| 51 |
+
embeddings = []
|
| 52 |
+
for text in texts:
|
| 53 |
+
|
| 54 |
+
embedding = bt_embeddings(text, "")
|
| 55 |
+
embeddings.append(embedding)
|
| 56 |
+
return embeddings
|
| 57 |
+
|
| 58 |
+
def embed_query(self, text: str) -> List[float]:
|
| 59 |
+
"""Embed a query using BridgeTower.
|
| 60 |
+
|
| 61 |
+
Parameters:
|
| 62 |
+
-----------
|
| 63 |
+
texts: str
|
| 64 |
+
The text to embed.
|
| 65 |
+
|
| 66 |
+
Returns:
|
| 67 |
+
Embeddings for the text.
|
| 68 |
+
"""
|
| 69 |
+
return self.embed_documents([text])[0]
|
src/preprocess/preprocessing.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from os import path as osp
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
import cv2
|
| 5 |
+
import webvtt
|
| 6 |
+
|
| 7 |
+
from utils import maintain_aspect_ratio_resize, str2time
|
| 8 |
+
|
| 9 |
+
def extract_and_save_frames_and_metadata(
|
| 10 |
+
path_to_video,
|
| 11 |
+
path_to_transcript,
|
| 12 |
+
path_to_save_extracted_frames,
|
| 13 |
+
path_to_save_metadatas):
|
| 14 |
+
|
| 15 |
+
# metadatas will store the metadata of all extracted frames
|
| 16 |
+
metadatas = []
|
| 17 |
+
|
| 18 |
+
# load video using cv2
|
| 19 |
+
video = cv2.VideoCapture(path_to_video)
|
| 20 |
+
# load transcript using webvtt
|
| 21 |
+
trans = webvtt.read(path_to_transcript)
|
| 22 |
+
|
| 23 |
+
# iterate transcript file
|
| 24 |
+
# for each video segment specified in the transcript file
|
| 25 |
+
for idx, transcript in enumerate(trans):
|
| 26 |
+
|
| 27 |
+
# get the start time and end time in seconds
|
| 28 |
+
start_time_ms = str2time(transcript.start)
|
| 29 |
+
end_time_ms = str2time(transcript.end)
|
| 30 |
+
# get the time in ms exactly
|
| 31 |
+
# in the middle of start time and end time
|
| 32 |
+
mid_time_ms = (end_time_ms + start_time_ms) / 2
|
| 33 |
+
# get the transcript, remove the next-line symbol
|
| 34 |
+
text = transcript.text.replace("\n", ' ')
|
| 35 |
+
# get frame at the middle time
|
| 36 |
+
video.set(cv2.CAP_PROP_POS_MSEC, mid_time_ms)
|
| 37 |
+
success, frame = video.read()
|
| 38 |
+
if success:
|
| 39 |
+
# if the frame is extracted successfully, resize it
|
| 40 |
+
image = maintain_aspect_ratio_resize(frame, height=350)
|
| 41 |
+
# save frame as JPEG file
|
| 42 |
+
img_fname = f'frame_{idx}.jpg'
|
| 43 |
+
img_fpath = osp.join(
|
| 44 |
+
path_to_save_extracted_frames, img_fname
|
| 45 |
+
)
|
| 46 |
+
cv2.imwrite(img_fpath, image)
|
| 47 |
+
|
| 48 |
+
# prepare the metadata
|
| 49 |
+
metadata = {
|
| 50 |
+
'extracted_frame_path': img_fpath,
|
| 51 |
+
'transcript': text,
|
| 52 |
+
'video_segment_id': idx,
|
| 53 |
+
'video_path': path_to_video,
|
| 54 |
+
'mid_time_ms': mid_time_ms,
|
| 55 |
+
}
|
| 56 |
+
metadatas.append(metadata)
|
| 57 |
+
|
| 58 |
+
else:
|
| 59 |
+
print(f"ERROR! Cannot extract frame: idx = {idx}")
|
| 60 |
+
|
| 61 |
+
# save metadata of all extracted frames
|
| 62 |
+
fn = osp.join(path_to_save_metadatas, 'metadatas.json')
|
| 63 |
+
with open(fn, 'w') as outfile:
|
| 64 |
+
json.dump(metadatas, outfile)
|
| 65 |
+
return metadatas
|
src/preprocessing_video.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/shared_data/videos/video1/7Hcg-rLYwdM.en.vtt
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
WEBVTT
|
| 2 |
+
Kind: captions
|
| 3 |
+
Language: en
|
| 4 |
+
|
| 5 |
+
00:00:03.620 --> 00:00:06.879
|
| 6 |
+
As I look back on the the mission that we've had here
|
| 7 |
+
|
| 8 |
+
00:00:06.879 --> 00:00:10.559
|
| 9 |
+
on the International Space Station,
|
| 10 |
+
I'm proud to have been a part of much of
|
| 11 |
+
|
| 12 |
+
00:00:10.559 --> 00:00:13.679
|
| 13 |
+
the science activities that happened over the last
|
| 14 |
+
|
| 15 |
+
00:00:13.680 --> 00:00:14.420
|
| 16 |
+
two months.
|
| 17 |
+
|
| 18 |
+
00:00:14.420 --> 00:00:15.780
|
| 19 |
+
The view is always amazing
|
| 20 |
+
|
| 21 |
+
00:00:15.780 --> 00:00:17.520
|
| 22 |
+
I didn't think I would do another
|
| 23 |
+
|
| 24 |
+
00:00:17.520 --> 00:00:20.720
|
| 25 |
+
spacewalk and to now have the chance to have done
|
| 26 |
+
|
| 27 |
+
00:00:20.720 --> 00:00:23.840
|
| 28 |
+
four more was just icing on the cake for a
|
| 29 |
+
|
| 30 |
+
00:00:23.840 --> 00:00:24.900
|
| 31 |
+
a wonderful mission.
|
| 32 |
+
|
| 33 |
+
00:00:24.900 --> 00:00:26.900
|
| 34 |
+
Does the 10th one feel like the first one?
|
| 35 |
+
|
| 36 |
+
00:00:26.960 --> 00:00:30.160
|
| 37 |
+
No, a little more comfortable on the tenth one.
|
| 38 |
+
|
| 39 |
+
00:00:30.160 --> 00:00:33.300
|
| 40 |
+
It's hard to put into words
|
| 41 |
+
|
| 42 |
+
00:00:33.420 --> 00:00:38.480
|
| 43 |
+
just what it was like to be a part of
|
| 44 |
+
this expedition, expedition 63. It'll be
|
| 45 |
+
|
| 46 |
+
00:00:38.480 --> 00:00:40.399
|
| 47 |
+
kind of a memory that will last a
|
| 48 |
+
|
| 49 |
+
00:00:40.400 --> 00:00:43.260
|
| 50 |
+
lifetime for me. It's been a true honor.
|
| 51 |
+
|
| 52 |
+
00:00:43.260 --> 00:00:44.800
|
| 53 |
+
Dragon SpaceX
|
| 54 |
+
|
| 55 |
+
00:00:44.800 --> 00:00:48.160
|
| 56 |
+
undock sequence commanded. Thrusters
|
| 57 |
+
looking good.
|
| 58 |
+
|
| 59 |
+
00:00:48.160 --> 00:00:50.440
|
| 60 |
+
The hardest part was getting us launched,
|
| 61 |
+
|
| 62 |
+
00:00:50.440 --> 00:00:53.080
|
| 63 |
+
but the most important part is bringing us home.
|
| 64 |
+
|
| 65 |
+
00:00:56.040 --> 00:00:59.180
|
| 66 |
+
Rise and shine Daddy. We love you.
|
| 67 |
+
|
| 68 |
+
00:00:59.540 --> 00:01:03.160
|
| 69 |
+
Hurry home so we can go get my dog.
|
| 70 |
+
|
| 71 |
+
00:01:06.040 --> 00:01:07.920
|
| 72 |
+
Splashdown!
|
| 73 |
+
|
| 74 |
+
00:01:07.920 --> 00:01:11.200
|
| 75 |
+
Welcome back to planet Earth and thanks for flying SpaceX.
|
| 76 |
+
|
| 77 |
+
00:01:11.200 --> 00:01:12.940
|
| 78 |
+
It's truly our honor and privilege.
|
| 79 |
+
|
| 80 |
+
00:01:12.940 --> 00:01:14.800
|
| 81 |
+
Space Dads are back on Earth
|
| 82 |
+
|
| 83 |
+
00:01:14.800 --> 00:01:19.140
|
| 84 |
+
after a 19-hour return journey from space.
|
| 85 |
+
|
src/shared_data/videos/video1/Welcome back to Planet Earth.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d90d6a55ba3a7c2c15ed78977df2721f67c0f907957d50688d8b695cf662c500
|
| 3 |
+
size 4578531
|
src/shared_data/videos/video1/audio.mp3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d757fa88232111a0f1ed24ae0e23a4143479391ff7a71e7255bdc52283496d6
|
| 3 |
+
size 1434687
|
src/shared_data/videos/video1/extracted_frame/frame_0.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_1.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_10.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_11.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_12.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_13.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_14.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_15.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_16.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_17.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_18.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_19.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_2.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_20.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_21.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_22.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_23.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_24.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_25.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_3.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_4.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_5.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_6.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_7.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_8.jpg
ADDED
|
src/shared_data/videos/video1/extracted_frame/frame_9.jpg
ADDED
|
src/shared_data/videos/video1/generated_captions.vtt
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
WEBVTT
|
| 2 |
+
|
| 3 |
+
00:00.000 --> 00:08.780
|
| 4 |
+
As I look back on the mission that we've had here on the International Space Station,
|
| 5 |
+
|
| 6 |
+
00:08.780 --> 00:13.300
|
| 7 |
+
I'm proud to have been a part of much of the science activities that happened over the
|
| 8 |
+
|
| 9 |
+
00:13.300 --> 00:14.300
|
| 10 |
+
last two months.
|
| 11 |
+
|
| 12 |
+
00:14.300 --> 00:16.180
|
| 13 |
+
The view is always amazing though.
|
| 14 |
+
|
| 15 |
+
00:16.180 --> 00:21.260
|
| 16 |
+
I didn't think I would do another spacewalk and to now have the chance to have done four
|
| 17 |
+
|
| 18 |
+
00:21.260 --> 00:24.980
|
| 19 |
+
more was just icing on the cake for a wonderful mission.
|
| 20 |
+
|
| 21 |
+
00:25.480 --> 00:26.980
|
| 22 |
+
The tenth one, do you like the first one?
|
| 23 |
+
|
| 24 |
+
00:26.980 --> 00:27.980
|
| 25 |
+
No.
|
| 26 |
+
|
| 27 |
+
00:27.980 --> 00:30.280
|
| 28 |
+
A little more comfortable on your tenth one.
|
| 29 |
+
|
| 30 |
+
00:30.280 --> 00:36.980
|
| 31 |
+
It's hard to put into words just what it was like to be a part of this expedition, the
|
| 32 |
+
|
| 33 |
+
00:36.980 --> 00:37.980
|
| 34 |
+
Expedition 63.
|
| 35 |
+
|
| 36 |
+
00:37.980 --> 00:42.280
|
| 37 |
+
It'll be kind of a memory that will last a lifetime for me.
|
| 38 |
+
|
| 39 |
+
00:42.280 --> 00:43.780
|
| 40 |
+
It's been a true honor.
|
| 41 |
+
|
| 42 |
+
00:43.780 --> 00:46.780
|
| 43 |
+
Dragon SpaceX, Undock sequence commanded.
|
| 44 |
+
|
| 45 |
+
00:46.780 --> 00:48.340
|
| 46 |
+
The roster's looking good.
|
| 47 |
+
|
| 48 |
+
00:48.340 --> 00:52.900
|
| 49 |
+
The hardest part was getting us launched, but the most important part is bringing us home.
|
| 50 |
+
|
| 51 |
+
00:55.980 --> 00:58.980
|
| 52 |
+
I've been telling Daddy we love you.
|
| 53 |
+
|
| 54 |
+
00:58.980 --> 01:02.980
|
| 55 |
+
Hurry home so we can go get my dog.
|
| 56 |
+
|
| 57 |
+
01:05.980 --> 01:07.980
|
| 58 |
+
Flashdown.
|
| 59 |
+
|
| 60 |
+
01:07.980 --> 01:10.980
|
| 61 |
+
Welcome back to Planet Earth and thanks for flying SpaceX.
|
| 62 |
+
|
| 63 |
+
01:10.980 --> 01:12.980
|
| 64 |
+
It was truly our honor and privilege.
|
| 65 |
+
|
| 66 |
+
01:12.980 --> 01:17.980
|
| 67 |
+
Space dads are back on Earth after a 19 hour return journey from space.
|
| 68 |
+
|
| 69 |
+
01:24.980 --> 01:27.980
|
| 70 |
+
You
|
| 71 |
+
|