import os | |
from datasets import load_dataset | |
CACHE_DIR = 'cache' | |
N_SAMPLES = 15 | |
def load_data(): | |
df = load_dataset("petrtsv-jb/commit-rewriting-samples", | |
split="train", | |
token=os.environ.get('HF_REWRITING_TOKEN'), | |
cache_dir=CACHE_DIR).to_pandas() | |
return df.to_dict('records')[:N_SAMPLES] | |