Hamses commited on
Commit
75705c8
1 Parent(s): 71a06c1

Delete preprocess Dataset

Browse files
Files changed (1) hide show
  1. preprocess Dataset +0 -15
preprocess Dataset DELETED
@@ -1,15 +0,0 @@
1
- from datasets import load_dataset
2
-
3
- # Load your custom dataset (ensure it's in the proper format)
4
- dataset = load_dataset('EU_Regulation_261_2004', data_files={'train': 'train.txt', 'test': 'test.txt'})
5
-
6
- # Load the GPT-2 tokenizer
7
- from transformers import GPT2Tokenizer
8
-
9
- tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
10
-
11
- # Preprocess the dataset
12
- def preprocess_function(examples):
13
- return tokenizer(examples['EU_Regulation_261_2004'], padding='max_length', truncation=True)
14
-
15
- encoded_dataset = dataset.map(preprocess_function, batched=True)