Spaces:
Runtime error
Runtime error
Create device.py
Browse files
device.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from base64 import b64encode
|
| 2 |
+
import numpy
|
| 3 |
+
import torch
|
| 4 |
+
from diffusers import AutoencoderKL, LMSDiscreteScheduler, UNet2DConditionModel
|
| 5 |
+
from huggingface_hub import notebook_login
|
| 6 |
+
|
| 7 |
+
# For video display:
|
| 8 |
+
from IPython.display import HTML
|
| 9 |
+
from matplotlib import pyplot as plt
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from PIL import Image
|
| 12 |
+
from torch import autocast
|
| 13 |
+
from torchvision import transforms as tfms
|
| 14 |
+
from tqdm.auto import tqdm
|
| 15 |
+
from transformers import CLIPTextModel, CLIPTokenizer, logging
|
| 16 |
+
import os
|
| 17 |
+
torch_device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# Load the autoencoder model which will be used to decode the latents into image space.
|
| 22 |
+
vae = AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="vae")
|
| 23 |
+
|
| 24 |
+
# Load the tokenizer and text encoder to tokenize and encode the text.
|
| 25 |
+
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
|
| 26 |
+
text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
|
| 27 |
+
|
| 28 |
+
# The UNet model for generating the latents.
|
| 29 |
+
unet = UNet2DConditionModel.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="unet")
|
| 30 |
+
|
| 31 |
+
# The noise scheduler
|
| 32 |
+
scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000)
|
| 33 |
+
|
| 34 |
+
# To the GPU we go!
|
| 35 |
+
vae = vae.to(torch_device)
|
| 36 |
+
text_encoder = text_encoder.to(torch_device)
|
| 37 |
+
unet = unet.to(torch_device);
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
token_emb_layer = text_encoder.text_model.embeddings.token_embedding
|
| 41 |
+
pos_emb_layer = text_encoder.text_model.embeddings.position_embedding
|
| 42 |
+
position_ids = text_encoder.text_model.embeddings.position_ids[:, :77]
|
| 43 |
+
position_embeddings = pos_emb_layer(position_ids)
|