from transformers import AlbertForSequenceClassification, AlbertTokenizer, Trainer, TrainingArguments from datasets import load_dataset # Load a dataset (replace with your dataset) dataset = load_dataset("text", data_files={"train": "path/to/train.txt", "test": "path/to/test.txt"}) # Preprocess the dataset (tokenization, formatting, etc.) def preprocess_function(examples): return tokenizer(examples["text"], padding="max_length", truncation=True) tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2") tokenized_dataset = dataset.map(preprocess_function, batched=True) # Load the model model = AlbertForSequenceClassification.from_pretrained("albert-base-v2", num_labels=2) # Adjust num_labels as needed # Define training arguments training_args = TrainingArguments( output_dir="./results", num_train_epochs=3, per_device_train_batch_size=8, per_device_eval_batch_size=8, warmup_steps=500, weight_decay=0.01, evaluate_during_training=True, logging_dir="./logs", ) # Initialize the Trainer trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset["train"], eval_dataset=tokenized_dataset["test"] ) # Train the model trainer.train() # Save the fine-tuned model model.save_pretrained("path/to/save/model")