| #!/usr/bin/env python3 | |
| """ | |
| Mobile VLA ์ฌ์ฉ ์์ | |
| """ | |
| import torch | |
| from transformers import AutoTokenizer, AutoProcessor | |
| from PIL import Image | |
| import numpy as np | |
| def load_mobile_vla_model(model_name="minuum/mobile-vla"): | |
| """Mobile VLA ๋ชจ๋ธ ๋ก๋""" | |
| # ์ฌ๊ธฐ์ ์ค์ ๋ชจ๋ธ ๋ก๋ฉ ๋ก์ง ๊ตฌํ | |
| print(f"Loading Mobile VLA model: {model_name}") | |
| # ์ค์ ๊ตฌํ์์๋ MobileVLATrainer๋ฅผ ์ฌ์ฉ | |
| # from robovlms.train.mobile_vla_trainer import MobileVLATrainer | |
| # model = MobileVLATrainer.from_pretrained(model_name) | |
| return None # ํ๋ ์ด์คํ๋ | |
| def predict_action(model, image_path, task_description): | |
| """์ก์ ์์ธก""" | |
| # ์ด๋ฏธ์ง ๋ก๋ | |
| image = Image.open(image_path).convert("RGB") | |
| # ์ ์ฒ๋ฆฌ (์ค์ ๊ตฌํ์์๋ mobile_vla_collate_fn ์ฌ์ฉ) | |
| # processed = preprocess_image(image) | |
| # ์์ธก (ํ๋ ์ด์คํ๋) | |
| dummy_action = [0.5, 0.2, 0.1] # [linear_x, linear_y, angular_z] | |
| return dummy_action | |
| def main(): | |
| """๋ฉ์ธ ์คํ ํจ์""" | |
| print("๐ Mobile VLA ์์ ์คํ") | |
| # ๋ชจ๋ธ ๋ก๋ | |
| model = load_mobile_vla_model() | |
| # ์์ ์์ธก | |
| task = "Navigate around obstacles to track the target cup" | |
| action = predict_action(model, "example_image.jpg", task) | |
| print(f"Task: {task}") | |
| print(f"Predicted Action: {action}") | |
| print(f" - Linear X (forward/backward): {action[0]:.3f}") | |
| print(f" - Linear Y (left/right): {action[1]:.3f}") | |
| print(f" - Angular Z (rotation): {action[2]:.3f}") | |
| if __name__ == "__main__": | |
| main() | |