Delete preprocess Dataset
Browse files- preprocess Dataset +0 -15
preprocess Dataset
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
from datasets import load_dataset
|
2 |
-
|
3 |
-
# Load your custom dataset (ensure it's in the proper format)
|
4 |
-
dataset = load_dataset('EU_Regulation_261_2004', data_files={'train': 'train.txt', 'test': 'test.txt'})
|
5 |
-
|
6 |
-
# Load the GPT-2 tokenizer
|
7 |
-
from transformers import GPT2Tokenizer
|
8 |
-
|
9 |
-
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
10 |
-
|
11 |
-
# Preprocess the dataset
|
12 |
-
def preprocess_function(examples):
|
13 |
-
return tokenizer(examples['EU_Regulation_261_2004'], padding='max_length', truncation=True)
|
14 |
-
|
15 |
-
encoded_dataset = dataset.map(preprocess_function, batched=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|