updated training tasks for big corpus
Browse files- __pycache__/tasks_v4.cpython-38.pyc +0 -0
- finetune_large_mt5_sentencefix_v4_16.gin +1 -1
- tasks.py +3 -3
- train_large_v4_16.sh +1 -1
__pycache__/tasks_v4.cpython-38.pyc
CHANGED
Binary files a/__pycache__/tasks_v4.cpython-38.pyc and b/__pycache__/tasks_v4.cpython-38.pyc differ
|
|
finetune_large_mt5_sentencefix_v4_16.gin
CHANGED
@@ -12,7 +12,7 @@ include "t5x/configs/runs/finetune.gin"
|
|
12 |
|
13 |
MIXTURE_OR_TASK_NAME = "sentencefix"
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
|
15 |
-
TRAIN_STEPS =
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.0
|
18 |
RANDOM_SEED = 0
|
|
|
12 |
|
13 |
MIXTURE_OR_TASK_NAME = "sentencefix"
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
|
15 |
+
TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.0
|
18 |
RANDOM_SEED = 0
|
tasks.py
CHANGED
@@ -9,9 +9,9 @@ import t5
|
|
9 |
import tensorflow.compat.v1 as tf
|
10 |
|
11 |
tsv_path = {
|
12 |
-
"train": "gs://nb-t5x/
|
13 |
-
"validation": "gs://nb-t5x/
|
14 |
-
"test": "gs://nb-t5x/
|
15 |
}
|
16 |
|
17 |
vocabulary = seqio.SentencePieceVocabulary(
|
|
|
9 |
import tensorflow.compat.v1 as tf
|
10 |
|
11 |
tsv_path = {
|
12 |
+
"train": "gs://nb-t5x-us-central2/corpus_big/train.tsv",
|
13 |
+
"validation": "gs://nb-t5x-us-central2/corpus_big/eval.tsv",
|
14 |
+
"test": "gs://nb-t5x-us-central2/corpus_big/test.tsv"
|
15 |
}
|
16 |
|
17 |
vocabulary = seqio.SentencePieceVocabulary(
|
train_large_v4_16.sh
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
PROJECT_DIR=${HOME}"/models/multi-sentencefix-mt5"
|
2 |
T5X_DIR="../../t5x" # directory where the t5x is cloned.
|
3 |
-
MODEL_DIR="gs://nb-t5x-us-central2/
|
4 |
export PYTHONPATH=${PROJECT_DIR}
|
5 |
|
6 |
python3 ${T5X_DIR}/t5x/train.py \
|
|
|
1 |
PROJECT_DIR=${HOME}"/models/multi-sentencefix-mt5"
|
2 |
T5X_DIR="../../t5x" # directory where the t5x is cloned.
|
3 |
+
MODEL_DIR="gs://nb-t5x-us-central2/model_mT5X_large_16_e"
|
4 |
export PYTHONPATH=${PROJECT_DIR}
|
5 |
|
6 |
python3 ${T5X_DIR}/t5x/train.py \
|