Spaces:

Koios-API
/

KoiosAPI-codegemma-7b-it

Runtime error

stakelovelace commited on Apr 29

Commit

339b8e7

•

1 Parent(s): 457f3a4

commit from tesla

Files changed (1) hide show

app.py CHANGED Viewed

@@ -33,7 +33,7 @@ def load_data_and_config(data_path):
 def train_model(model, tokenizer, data, device):
     """Trains the model using the Hugging Face Trainer API."""
-    inputs = [tokenizer(d['text'], max_length=512, truncation=True, padding='max_length', return_tensors="pt") for d in data]
     dataset = Dataset.from_dict({
         'input_ids': [x['input_ids'].squeeze() for x in inputs],
         'labels': [x['input_ids'].squeeze() for x in inputs]
@@ -42,9 +42,9 @@ def train_model(model, tokenizer, data, device):
     training_args = TrainingArguments(
         output_dir='./results',
         num_train_epochs=3,
-        per_device_train_batch_size=8,
-        gradient_accumulation_steps=4,
-        fp16=True,  # Enable mixed precision
         warmup_steps=500,
         weight_decay=0.01,
         logging_dir='./logs',
@@ -70,8 +70,9 @@ def main(api_name, base_url):
     # Load the configuration for a specific model
     config = AutoConfig.from_pretrained('google/codegemma-2b')
     # Update the activation function
-    config.hidden_act = 'gelu_pytorch_tanh'  # Set to use approximate GeLU
     model = AutoModelForCausalLM.from_pretrained('google/codegemma-2b', is_decoder=True)
     #model = BertLMHeadModel.from_pretrained('google/codegemma-2b', is_decoder=True)
     # Example assuming you have a prepared dataset for classification

 def train_model(model, tokenizer, data, device):
     """Trains the model using the Hugging Face Trainer API."""
+    inputs = [tokenizer(d['text'], max_length=256, truncation=True, padding='max_length', return_tensors="pt") for d in data]
     dataset = Dataset.from_dict({
         'input_ids': [x['input_ids'].squeeze() for x in inputs],
         'labels': [x['input_ids'].squeeze() for x in inputs]
     training_args = TrainingArguments(
         output_dir='./results',
         num_train_epochs=3,
+        per_device_train_batch_size=1,
+        gradient_accumulation_steps=2,
+        # fp16=True,  # Enable mixed precision
         warmup_steps=500,
         weight_decay=0.01,
         logging_dir='./logs',
     # Load the configuration for a specific model
     config = AutoConfig.from_pretrained('google/codegemma-2b')
     # Update the activation function
+    config.hidden_act = ''  # Set to use approximate GeLU gelu_pytorch_tanh
+    config.hidden_activation = 'gelu'  # Set to use GeLU
     model = AutoModelForCausalLM.from_pretrained('google/codegemma-2b', is_decoder=True)
     #model = BertLMHeadModel.from_pretrained('google/codegemma-2b', is_decoder=True)
     # Example assuming you have a prepared dataset for classification