commit-message-editing / data_loader.py
Petr Tsvetkov
Switch to the special commit rewriting dataset
928b43c
raw
history blame
365 Bytes
import os
from datasets import load_dataset
CACHE_DIR = 'cache'
N_SAMPLES = 15
def load_data():
df = load_dataset("petrtsv-jb/commit-rewriting-samples",
split="train",
token=os.environ.get('HF_REWRITING_TOKEN'),
cache_dir=CACHE_DIR).to_pandas()
return df.to_dict('records')[:N_SAMPLES]