consult / test.py
aravsaxena884's picture
final
00462bb
raw
history blame contribute delete
512 Bytes
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3.2-1b-bnb-4bit")
base_model = AutoModelForCausalLM.from_pretrained(
"unsloth/llama-3.2-1b-bnb-4bit",
device_map="auto",
torch_dtype=torch.float16,
)
model = PeftModel.from_pretrained(base_model, "MeWan2808/SIT_legalTech_llama3.2")
model = model.merge_and_unload()
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)