{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "id": "2eSvM9zX_2d3" }, "outputs": [], "source": [ "%%capture\n", "!pip install unsloth\n", "# Also get the latest nightly Unsloth!\n", "!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n", "\n", "# Install Flash Attention 2 for softcapping support\n", "import torch\n", "if torch.cuda.get_device_capability()[0] >= 8:\n", " !pip install --no-deps packaging ninja einops \"flash-attn>=2.6.3\"" ] }, { "cell_type": "markdown", "metadata": { "id": "r2v_X2fA0Df5" }, "source": [ "* We support Llama, Mistral, Phi-3, Gemma, Yi, DeepSeek, Qwen, TinyLlama, Vicuna, Open Hermes etc\n", "* We support 16bit LoRA or 4bit QLoRA. Both 2x faster.\n", "* `max_seq_length` can be set to anything, since we do automatic RoPE Scaling via [kaiokendev's](https://kaiokendev.github.io/til) method.\n", "* [**NEW**] We make Gemma-2 9b / 27b **2x faster**! See our [Gemma-2 9b notebook](https://colab.research.google.com/drive/1vIrqH5uYDQwsJ4-OO3DErvuv4pBgVwk4?usp=sharing)\n", "* [**NEW**] To finetune and auto export to Ollama, try our [Ollama notebook](https://colab.research.google.com/drive/1WZDi7APtQ9VsvOrQSSC5DDtxq159j8iZ?usp=sharing)\n", "* [**NEW**] We make Mistral NeMo 12B 2x faster and fit in under 12GB of VRAM! [Mistral NeMo notebook](https://colab.research.google.com/drive/17d3U-CAIwzmbDRqbZ9NnpHxCkmXB6LZ0?usp=sharing)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 304, "referenced_widgets": [ "a7d0f0d1ae2946919a4624afe63955ba", "360a61aedcbc4a1dae69296db755834d", "fc8c43fb06f94bbc92c15da546a0d8bd", "31535774b35744aa941dcc1d9f38ab3c", "6374dd2369534e179fb17e1d67ff979a", "8787ae2dd4f14eb8bc32752c8005f0dd", "5ecb5e170c3f48599b85ca722cd43ef6", "a2c78f2c126541e4b2a97be71ead0c79", "3663163dbd9a40e2921975f19cd71eda", "c3adeda09c1843778efb13cd7c22658b", "b4e3e9d17dec4594966adedaf0118c93", "fa70c9f2a7d24836a2ceb5cebdfbd9a4", "63033b1264fa4ef79aab3101450f1ab9", "39a20c40ae4f4c11a95c7d66cabbc903", "86a47700ac6a4b27976509c0b0025e82", "926b19baa5ab462ea153546141c300c0", "9220e848ab2a453091e1037f7e5c238f", "dd2f632d1d524ff799801c723ac169c8", "4395d5d9eaf34a768373e771caf6b604", "002ea1c177e740898fcb02ea91c50f23", "429c6801e21b40878a4e6ffadacc764a", "dca0fa0c2aa74621a34747f8036a9c03", "9321e15c3653489a87117e882eb5a6f7", "6d3c7772b4c9461d93eeb5938655997d", "73ec072c24774e539a83a7e0b2b5d9d6", "d27de1b0b2e44fa18704cf3c5dbd2477", "324a061e42e046fc947a65774ce9ae30", "71f3ee0abf06493d8325f5f8db0d4de3", "29b4146ef6d3464688600458d84f03ed", "d14baad345f445f8ae98f2b180611b6c", "2fd205a5971e4f6890b6b90d2a1d69fd", "f00ae62ddd8949478892284992923099", "8449f40ccd2e4ced89d684d5e1f69f1c", "7a401485d06e48118fd61f2d1bf47c45", "e596983b4b40476aa812f796ae84b95a", "82c8463282084d2882bf30906bacc139", "849082bd74234e64a125bd5112715d81", "d8ff1d43870342868c6d9e445582caea", "ec946b5b32ba49cf90bb4a8fb3921876", "006a35217eaa4bc5ac50b0976f54fed0", "e4c6000455444f98b57c66daa27b22f4", "75297a92240548c3b6f969a66e35e392", "718fb4c6633945fd859044f9e041effa", "e2e2ebb66c4c4ec79afb24f436bea0c6", "16818f8211624ab38d9798b97d775b7e", "99595f2bfb9342eb8f8490ad0e0bfd1a", "969f0865119f460c863682ef1e2745f3", "4d02c65a677f4976a841545314ca28da", "c696e50b3f9e48d0b03d790715985155", "abbbfcda624c409f8f8589904dbbdd27", "541d6cf97e194aea9f727213309c273d", "75f4fceddf1b457bb2b5acac846e4146", "f015660f44e14c498d1ad460ca46a46c", "5cf2ca95dbfb43e98c10463c05c34d45", "d8e36e25f33447cbb06529e2d905c2c1" ] }, "id": "QmUBVEnvCDJv", "outputId": "27e0e3f5-d799-4ab9-fdc7-a0a0dd41c12b" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", "==((====))== Unsloth 2024.9.post3: Fast Llama patching. Transformers = 4.45.1.\n", " \\\\ /| GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.\n", "\\ / Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n", "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a7d0f0d1ae2946919a4624afe63955ba", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model.safetensors: 0%| | 0.00/2.47G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "fa70c9f2a7d24836a2ceb5cebdfbd9a4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "generation_config.json: 0%| | 0.00/184 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9321e15c3653489a87117e882eb5a6f7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/54.6k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7a401485d06e48118fd61f2d1bf47c45", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.json: 0%| | 0.00/9.09M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "16818f8211624ab38d9798b97d775b7e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "special_tokens_map.json: 0%| | 0.00/454 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n", "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n", "load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.\n", "\n", "# # 4bit pre quantized models we support for 4x faster downloading + no OOMs.\n", "# fourbit_models = [\n", "# \"unsloth/Meta-Llama-3.1-8B-bnb-4bit\", # Llama-3.1 15 trillion tokens model 2x faster!\n", "# \"unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit\",\n", "# \"unsloth/Meta-Llama-3.1-70B-bnb-4bit\",\n", "# \"unsloth/Meta-Llama-3.1-405B-bnb-4bit\", # We also uploaded 4bit for 405b!\n", "# \"unsloth/Mistral-Nemo-Base-2407-bnb-4bit\", # New Mistral 12b 2x faster!\n", "# \"unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit\",\n", "# \"unsloth/mistral-7b-v0.3-bnb-4bit\", # Mistral v3 2x faster!\n", "# \"unsloth/mistral-7b-instruct-v0.3-bnb-4bit\",\n", "# \"unsloth/Phi-3-mini-4k-instruct\", # Phi-3 2x faster!d\n", "# \"unsloth/Phi-3-medium-4k-instruct\",\n", "# \"unsloth/gemma-2-9b-bnb-4bit\",\n", "# \"unsloth/gemma-2-27b-bnb-4bit\", # Gemma 2x faster!\n", "# \"unsloth/gemma-2-2b-it\", # New small Gemma model!\n", "# ] # More models at https://huggingface.co/unsloth\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/Llama-3.2-1B-Instruct\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \"hf_...\", # use one if using gated models like meta-llama/Llama-2-7b-hf\n", ")" ] }, { "cell_type": "markdown", "metadata": { "id": "SXd9bTZd1aaL" }, "source": [ "We now add LoRA adapters so we only need to update 1 to 10% of all parameters!" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "6bZsfBuZDeCL", "outputId": "da119557-726e-4605-ea13-dd4cd0ec448c" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.9.post3 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0, # Supports any, but = 0 is optimized\n", " bias = \"none\", # Supports any, but = \"none\" is optimized\n", " # [NEW] \"unsloth\" uses 30% less VRAM, fits 2x larger batch sizes!\n", " use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n", " random_state = 3407,\n", " use_rslora = False, # We support rank stabilized LoRA\n", " loftq_config = None, # And LoftQ\n", ")" ] }, { "cell_type": "markdown", "metadata": { "id": "vITh0KVJ10qX" }, "source": [ "\n", "### Data Prep\n", "We now use the Alpaca dataset from [yahma](https://huggingface.co/datasets/yahma/alpaca-cleaned), which is a filtered version of 52K of the original [Alpaca dataset](https://crfm.stanford.edu/2023/03/13/alpaca.html). You can replace this code section with your own data prep.\n", "\n", "**[NOTE]** To train only on completions (ignoring the user's input) read TRL's docs [here](https://huggingface.co/docs/trl/sft_trainer#train-on-completions-only).\n", "\n", "**[NOTE]** Remember to add the **EOS_TOKEN** to the tokenized output!! Otherwise you'll get infinite generations!\n", "\n", "If you want to use the `llama-3` template for ShareGPT datasets, try our conversational [notebook](https://colab.research.google.com/drive/1XamvWYinY6FOSX9GLvnqSjjsNflxdhNc?usp=sharing).\n", "\n", "For text completions like novel writing, try this [notebook](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing)." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "LjY75GoYUCB8" }, "outputs": [], "source": [ "alpaca_prompt = \"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"prompt\"]\n", " inputs = examples[\"query\"]\n", " outputs = examples[\"response\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "# from datasets import load_dataset\n", "# dataset = load_dataset(\"yahma/alpaca-cleaned\", split = \"train\")\n", "# dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "xUTnbqJUFDXc", "outputId": "2799c1c4-388e-47a9-8fc4-0a703734b038" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training dataset size: 2256\n", "Test dataset size: 565\n" ] } ], "source": [ "from datasets import load_dataset\n", "\n", "# Load the dataset\n", "dataset = load_dataset(\"/content\", data_files=\"restructured_dataset.json\")\n", "\n", "# Split the dataset into 80% training and 20% test\n", "split_ratio = 0.8\n", "train_test_split = dataset[\"train\"].train_test_split(test_size=1 - split_ratio, seed=42) # Set seed for reproducibility\n", "\n", "# Get the train and test datasets\n", "train_dataset = train_test_split[\"train\"]\n", "test_dataset = train_test_split[\"test\"]\n", "\n", "# Output the sizes to verify\n", "print(f\"Training dataset size: {len(train_dataset)}\")\n", "print(f\"Test dataset size: {len(test_dataset)}\")\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "id": "cgFqwIGpDAiY" }, "outputs": [], "source": [ "train_dataset = train_dataset.map(formatting_prompts_func, batched = True,)\n", "test_dataset = test_dataset.map(formatting_prompts_func, batched = True,)\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "id": "83mlFwUChiz7" }, "outputs": [], "source": [ "# train_dataset['text']" ] }, { "cell_type": "markdown", "metadata": { "id": "idAEIeSQ3xdS" }, "source": [ "\n", "### Train the model\n", "Now let's use Huggingface TRL's `SFTTrainer`! More docs here: [TRL SFT docs](https://huggingface.co/docs/trl/sft_trainer). We do 60 steps to speed things up, but you can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`. We also support TRL's `DPOTrainer`!" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "95_Nn-89DhsL", "outputId": "f144252e-5cef-46db-e040-5e657772ef7f" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "from unsloth import is_bfloat16_supported\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = train_dataset,\n", " eval_dataset=test_dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = True, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 2,\n", " per_device_eval_batch_size = 4,\n", " gradient_accumulation_steps = 4,\n", " warmup_steps = 5,\n", " num_train_epochs = 2, # Set this for 1 full training run.\n", " # max_steps = 60,\n", " learning_rate = 2e-4,\n", " fp16 = not is_bfloat16_supported(),\n", " bf16 = is_bfloat16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"outputs\",\n", " evaluation_strategy=\"steps\"\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "hkC1v_kaBUiW", "outputId": "ee9a3108-d05a-48a5-92a9-888257ea66ef" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 277 | Num Epochs = 2\n", "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n", "\\ / Total batch size = 8 | Total steps = 68\n", " \"-____-\" Number of trainable parameters = 11,272,192\n" ] }, { "data": { "text/html": [ "\n", "
Step | \n", "Training Loss | \n", "Validation Loss | \n", "
---|---|---|
1 | \n", "0.750300 | \n", "0.767613 | \n", "
2 | \n", "0.709100 | \n", "0.760781 | \n", "
3 | \n", "0.685500 | \n", "0.748784 | \n", "
4 | \n", "0.652100 | \n", "0.731342 | \n", "
5 | \n", "0.711400 | \n", "0.714547 | \n", "
6 | \n", "0.774900 | \n", "0.693540 | \n", "
7 | \n", "0.665600 | \n", "0.674969 | \n", "
8 | \n", "0.735700 | \n", "0.656129 | \n", "
9 | \n", "0.620000 | \n", "0.638963 | \n", "
10 | \n", "0.591200 | \n", "0.621351 | \n", "
11 | \n", "0.650800 | \n", "0.604045 | \n", "
12 | \n", "0.587300 | \n", "0.587853 | \n", "
13 | \n", "0.610400 | \n", "0.573831 | \n", "
14 | \n", "0.544500 | \n", "0.560170 | \n", "
15 | \n", "0.590700 | \n", "0.545877 | \n", "
16 | \n", "0.492100 | \n", "0.534258 | \n", "
17 | \n", "0.520900 | \n", "0.519963 | \n", "
18 | \n", "0.527900 | \n", "0.505541 | \n", "
19 | \n", "0.539500 | \n", "0.492883 | \n", "
20 | \n", "0.508900 | \n", "0.480199 | \n", "
21 | \n", "0.462900 | \n", "0.467022 | \n", "
22 | \n", "0.441700 | \n", "0.453975 | \n", "
23 | \n", "0.418200 | \n", "0.442243 | \n", "
24 | \n", "0.432400 | \n", "0.430087 | \n", "
25 | \n", "0.419300 | \n", "0.418594 | \n", "
26 | \n", "0.395600 | \n", "0.407527 | \n", "
27 | \n", "0.387800 | \n", "0.396506 | \n", "
28 | \n", "0.450600 | \n", "0.384659 | \n", "
29 | \n", "0.370400 | \n", "0.373602 | \n", "
30 | \n", "0.364500 | \n", "0.363078 | \n", "
31 | \n", "0.332300 | \n", "0.353667 | \n", "
32 | \n", "0.305700 | \n", "0.344543 | \n", "
33 | \n", "0.322600 | \n", "0.335432 | \n", "
34 | \n", "0.338900 | \n", "0.327199 | \n", "
35 | \n", "0.331000 | \n", "0.318517 | \n", "
36 | \n", "0.349100 | \n", "0.310108 | \n", "
37 | \n", "0.252700 | \n", "0.303383 | \n", "
38 | \n", "0.294900 | \n", "0.297450 | \n", "
39 | \n", "0.247400 | \n", "0.289259 | \n", "
40 | \n", "0.242800 | \n", "0.281499 | \n", "
41 | \n", "0.254700 | \n", "0.275865 | \n", "
42 | \n", "0.259000 | \n", "0.270416 | \n", "
43 | \n", "0.238900 | \n", "0.264625 | \n", "
44 | \n", "0.239500 | \n", "0.258969 | \n", "
45 | \n", "0.223000 | \n", "0.253462 | \n", "
46 | \n", "0.207800 | \n", "0.248274 | \n", "
47 | \n", "0.251200 | \n", "0.242153 | \n", "
48 | \n", "0.200100 | \n", "0.237188 | \n", "
49 | \n", "0.214300 | \n", "0.232814 | \n", "
50 | \n", "0.199100 | \n", "0.228829 | \n", "
51 | \n", "0.226400 | \n", "0.225165 | \n", "
52 | \n", "0.197800 | \n", "0.222183 | \n", "
53 | \n", "0.222200 | \n", "0.219091 | \n", "
54 | \n", "0.222400 | \n", "0.215774 | \n", "
55 | \n", "0.193700 | \n", "0.212546 | \n", "
56 | \n", "0.205900 | \n", "0.209754 | \n", "
57 | \n", "0.216200 | \n", "0.207039 | \n", "
58 | \n", "0.196500 | \n", "0.204481 | \n", "
59 | \n", "0.207200 | \n", "0.202018 | \n", "
60 | \n", "0.176200 | \n", "0.199767 | \n", "
61 | \n", "0.160900 | \n", "0.197782 | \n", "
62 | \n", "0.169300 | \n", "0.195964 | \n", "
63 | \n", "0.185100 | \n", "0.194448 | \n", "
64 | \n", "0.182700 | \n", "0.193181 | \n", "
65 | \n", "0.171500 | \n", "0.192146 | \n", "
66 | \n", "0.164900 | \n", "0.191384 | \n", "
67 | \n", "0.192900 | \n", "0.190831 | \n", "
68 | \n", "0.209000 | \n", "0.190555 | \n", "
"
],
"text/plain": [
"