Igor Evdokimov
- basics
d632b3b
from huggingface_hub import login
from datasets import load_dataset
def prepare(example):
# подставьте реальные поля вашего jsonl
inp = example.get("prompt","")
tgt = example.get("completion","")
return {"input_text": inp, "target_text": tgt}
ds = load_dataset("json", data_files="trainingSet.json", split="train")
ds = ds.map(prepare)
ds = ds.train_test_split(test_size=0.02)
ds.save_to_disk("processed_ds")
print("Saved processed_ds")