| from huggingface_hub import login | |
| from datasets import load_dataset | |
| def prepare(example): | |
| # подставьте реальные поля вашего jsonl | |
| inp = example.get("prompt","") | |
| tgt = example.get("completion","") | |
| return {"input_text": inp, "target_text": tgt} | |
| ds = load_dataset("json", data_files="trainingSet.json", split="train") | |
| ds = ds.map(prepare) | |
| ds = ds.train_test_split(test_size=0.02) | |
| ds.save_to_disk("processed_ds") | |
| print("Saved processed_ds") |