Spaces:
Runtime error
Runtime error
lucidmorto
commited on
Commit
•
80915e3
1
Parent(s):
3f7af4c
feat: Upgrade to t5-large model and adjust training params
Browse filesUpgraded model from t5-base to t5-large for improved performance. Adjusted training parameters: increased epochs, reduced batch size due to larger model, increased warmup steps and gradient accumulation, and slightly lowered learning rate to enhance training stability. Adjusted evaluation and checkpoint saving frequency to align with updated model and training settings.
- app.py +1 -1
- humanizer.py +11 -11
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
|
4 |
-
model_name = "t5-
|
5 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
6 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
7 |
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
|
4 |
+
model_name = "t5-large"
|
5 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
6 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
7 |
|
humanizer.py
CHANGED
@@ -40,7 +40,7 @@ processed_dataset = {split: data.map(prepare_data) for split, data in dataset.it
|
|
40 |
logger.info("Dataset prepared.")
|
41 |
|
42 |
# Tokenize the dataset
|
43 |
-
model_name = "t5-
|
44 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
45 |
|
46 |
def tokenize_function(examples):
|
@@ -63,25 +63,25 @@ model = T5ForConditionalGeneration.from_pretrained(model_name)
|
|
63 |
|
64 |
training_args = Seq2SeqTrainingArguments(
|
65 |
output_dir="./results",
|
66 |
-
num_train_epochs=
|
67 |
-
per_device_train_batch_size=
|
68 |
-
per_device_eval_batch_size=
|
69 |
-
warmup_steps=
|
70 |
weight_decay=0.01,
|
71 |
logging_dir="./logs",
|
72 |
logging_steps=100,
|
73 |
evaluation_strategy="steps",
|
74 |
-
eval_steps=
|
75 |
-
save_steps=
|
76 |
-
use_cpu=False,
|
77 |
load_best_model_at_end=True,
|
78 |
metric_for_best_model="eval_loss",
|
79 |
greater_is_better=False,
|
80 |
-
fp16=True,
|
81 |
-
gradient_accumulation_steps=
|
82 |
)
|
83 |
|
84 |
-
optimizer = torch.optim.AdamW(model.parameters(), lr=
|
85 |
scheduler = get_linear_schedule_with_warmup(
|
86 |
optimizer,
|
87 |
num_warmup_steps=500,
|
|
|
40 |
logger.info("Dataset prepared.")
|
41 |
|
42 |
# Tokenize the dataset
|
43 |
+
model_name = "t5-large"
|
44 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
45 |
|
46 |
def tokenize_function(examples):
|
|
|
63 |
|
64 |
training_args = Seq2SeqTrainingArguments(
|
65 |
output_dir="./results",
|
66 |
+
num_train_epochs=5, # Increased epochs
|
67 |
+
per_device_train_batch_size=16, # Reduced batch size due to larger model
|
68 |
+
per_device_eval_batch_size=16,
|
69 |
+
warmup_steps=1000, # Increased warmup steps
|
70 |
weight_decay=0.01,
|
71 |
logging_dir="./logs",
|
72 |
logging_steps=100,
|
73 |
evaluation_strategy="steps",
|
74 |
+
eval_steps=500,
|
75 |
+
save_steps=500,
|
76 |
+
use_cpu=False,
|
77 |
load_best_model_at_end=True,
|
78 |
metric_for_best_model="eval_loss",
|
79 |
greater_is_better=False,
|
80 |
+
fp16=True,
|
81 |
+
gradient_accumulation_steps=4, # Increased to simulate larger batch sizes
|
82 |
)
|
83 |
|
84 |
+
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5) # Slightly lower learning rate
|
85 |
scheduler = get_linear_schedule_with_warmup(
|
86 |
optimizer,
|
87 |
num_warmup_steps=500,
|