import os from datasets import load_dataset CACHE_DIR = 'cache' N_SAMPLES = 15 def load_data(): df = load_dataset("petrtsv-jb/commit-rewriting-samples", split="train", token=os.environ.get('HF_REWRITING_TOKEN'), cache_dir=CACHE_DIR).to_pandas() return df.to_dict('records')[:N_SAMPLES]