{ "cells": [ { "cell_type": "markdown", "id": "761319ec-18e8-46c5-b7d6-bfdabc892e38", "metadata": { "id": "761319ec-18e8-46c5-b7d6-bfdabc892e38" }, "source": [ "## Post-process a finetuned LLM\n", "\n", "Test and upload a finetuned language model" ] }, { "cell_type": "code", "execution_count": null, "id": "00e6173c-597b-4eb0-8275-bab2c62d4d48", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "00e6173c-597b-4eb0-8275-bab2c62d4d48", "outputId": "290cab53-6854-41de-a1fa-e0d174adbf59" }, "outputs": [], "source": [ "!pip install -q -U huggingface_hub peft transformers torch accelerate bitsandbytes" ] }, { "cell_type": "code", "execution_count": null, "id": "7d9e47e4-3adf-4aa4-910c-9f75eb213420", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 145, "referenced_widgets": [ "d27a99c9454a42669954005db152bcdb", "1c8ae80c5e454e938d1010b62895f7a2", "7415c2eaad4d484abec4cd71ec40d9a2", "fe4580bf81f74921b18e4791509daa83", "2f0e202a91c54fafabed7415d48fc73c", "0dded06bf7f34c7f98ec3e848659d24f", "74eaf6da011745e199f5f76d38b0c884", "926085da23864493ad769225649e89ef", "e63dfce513c2412caaf4cb285d60151b", "dfae1a43351f42e2adf5a443a3c1e085", "22bcf39d833d43f2a43e7464d4b2f0fb", "78797db4dd51494db33a6603f2ecd060", "5fd869d422004d758ec2e87779200829", "20040b4fd7f84dd5bf3466ddaf66deea", "e9752af013ef4468bb7d465574533242", "a39961c5f9c3477b83eeb0a1f6c64d0a", "9c9246e66c2c45b18b9c282ecc94e0bf", "86128aadb59e4ca6a293000b9cf39f06", "b4f8140c7a674bde9505db55f6541bfb", "6c7de99ef7ff41d99228de2c2273eece", "14751ec0d56d4125952134e35dda4b36", "8e40dba2d99e4dbd88dba9085e10c36d", "250db2cbdc3b4b15965aa1ec4d34dc24", "d7faa7875fd24fdca89b38318b6624ef", "6706a59598e042749e1f5e9916e81645", "d4eff178b2374120ae54be3616a834af", "f3333f764c9748238ea62f0febb0f7d3", "789e961e93304d6285ed20f58bef4d58", "969061586df34e1bb65cde513250f6a9", "3f54505d762244ff8f0f77f122dacea8", "defe50df284d484f98039fb37eee28d6", "b643b3cfb364440c97f073e8377c19b5" ] }, "id": "7d9e47e4-3adf-4aa4-910c-9f75eb213420", "outputId": "8bb2c7e5-c0ee-4715-bba9-725a531d3ea5" }, "outputs": [], "source": [ "from huggingface_hub import notebook_login\n", "\n", "notebook_login()" ] }, { "cell_type": "code", "execution_count": null, "id": "25376737-3af9-4382-9240-9e94050c8ae7", "metadata": { "id": "25376737-3af9-4382-9240-9e94050c8ae7" }, "outputs": [], "source": [ "!git config --global credential.helper store" ] }, { "cell_type": "markdown", "id": "e24d7ddd-044f-4739-8b01-5a1b187150f5", "metadata": { "id": "e24d7ddd-044f-4739-8b01-5a1b187150f5" }, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": null, "id": "bfeb06bd-96d9-4db0-81ee-1053104f915c", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "bfeb06bd-96d9-4db0-81ee-1053104f915c", "outputId": "bbba2467-7171-497f-e66e-2bcb6237c183" }, "outputs": [], "source": [ "!nvidia-smi" ] }, { "cell_type": "markdown", "id": "c68a3bba-f374-4ed6-acb6-ebedeb6e29be", "metadata": { "id": "c68a3bba-f374-4ed6-acb6-ebedeb6e29be" }, "source": [ "## Loss curve\n", "\n", "During training, the model converged nicely as follows:\n", "\n", "![image](https://raw.githubusercontent.com/daniel-furman/sft-demos/main/assets/jul_24_23_1_14_00_log_loss_curves_llama-2-70b-dolphin.png)\n" ] }, { "cell_type": "markdown", "id": "d6076740-c324-49a9-841e-30b4878d208b", "metadata": { "id": "d6076740-c324-49a9-841e-30b4878d208b" }, "source": [ "## Basic usage\n", "\n", "With a supervised finetuned (sft) model in hand, we can test it on some basic prompts and then upload it to the Hugging Face hub either as a public or private model repo, depending on the use case." ] }, { "cell_type": "code", "execution_count": null, "id": "AsrAO-TL25pN", "metadata": { "id": "AsrAO-TL25pN" }, "outputs": [], "source": [ "import torch\n", "from peft import PeftModel, PeftConfig\n", "from transformers import (\n", " AutoModelForCausalLM,\n", " AutoTokenizer,\n", " BitsAndBytesConfig,\n", " pipeline,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "6b17e2d0-b482-4ebd-a830-325d98318f70", "metadata": { "id": "6b17e2d0-b482-4ebd-a830-325d98318f70" }, "outputs": [], "source": [ "peft_model_id = \"dfurman/llama-2-13b-dolphin-peft\"\n", "config = PeftConfig.from_pretrained(peft_model_id)\n", "\n", "bnb_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_compute_dtype=torch.bfloat16,\n", ")\n", "\n", "model = AutoModelForCausalLM.from_pretrained(\n", " config.base_model_name_or_path,\n", " quantization_config=bnb_config,\n", " use_auth_token=True,\n", " device_map=\"auto\",\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "FE_Tdkin3Whb", "metadata": { "id": "FE_Tdkin3Whb" }, "outputs": [], "source": [ "# Load the tokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, use_fast=True)\n", "tokenizer.pad_token = tokenizer.eos_token\n", "\n", "# Load the Lora model\n", "model = PeftModel.from_pretrained(model, peft_model_id)\n", "\n", "# Prompt formatter\n", "format_template = \"You are a helpful assistant. {query}\\n\"" ] }, { "cell_type": "code", "execution_count": null, "id": "b93b4abc-50e6-4308-8bfb-dfd164aef43f", "metadata": { "id": "b93b4abc-50e6-4308-8bfb-dfd164aef43f" }, "outputs": [], "source": [ "model" ] }, { "cell_type": "code", "execution_count": null, "id": "136794be-1cf3-4478-90df-61507179f076", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "136794be-1cf3-4478-90df-61507179f076", "outputId": "786629c0-3f80-47bc-b853-a2beecf22c88" }, "outputs": [], "source": [ "free_in_GB = int(torch.cuda.mem_get_info()[0] / 1024**3)\n", "max_memory = f\"{free_in_GB-2}GB\"\n", "n_gpus = torch.cuda.device_count()\n", "max_memory = {i: max_memory for i in range(n_gpus)}\n", "print(\"max memory: \", max_memory)" ] }, { "cell_type": "code", "execution_count": null, "id": "5g-JzwDH3NHi", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "id": "5g-JzwDH3NHi", "outputId": "de3d6611-7771-44d9-8dc4-5dfa72800bda" }, "outputs": [], "source": [ "# First, format the prompt\n", "query = \"Tell me a recipe for vegan banana bread.\"\n", "prompt = format_template.format(query=query)\n", "prompt" ] }, { "cell_type": "code", "execution_count": null, "id": "p-v89cpL7nyN", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "p-v89cpL7nyN", "outputId": "6874c921-ab46-4613-c26e-ce0e29f06f42" }, "outputs": [], "source": [ "# Inference can be done using model.generate\n", "print(\"\\n\\n*** Generate:\")\n", "\n", "input_ids = tokenizer(prompt, return_tensors=\"pt\").input_ids.cuda()\n", "with torch.autocast(\"cuda\", dtype=torch.bfloat16):\n", " output = model.generate(\n", " input_ids=input_ids,\n", " max_new_tokens=512,\n", " do_sample=True,\n", " temperature=0.7,\n", " return_dict_in_generate=True,\n", " eos_token_id=tokenizer.eos_token_id,\n", " pad_token_id=tokenizer.pad_token_id,\n", " repetition_penalty=1.2,\n", " )\n", "\n", "print(tokenizer.decode(output[\"sequences\"][0], skip_special_tokens=True))" ] }, { "cell_type": "code", "execution_count": null, "id": "1vPU1_9E93Fp", "metadata": { "id": "1vPU1_9E93Fp" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "GiuA0h8779pf", "metadata": { "id": "GiuA0h8779pf" }, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "A100", "machine_shape": "hm", "provenance": [] }, "interpreter": { "hash": "301faebbd5cea7fd4466786a19f1bea9d8baf657aaca95ef39840c46b8697603" }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "0dded06bf7f34c7f98ec3e848659d24f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a39961c5f9c3477b83eeb0a1f6c64d0a", "placeholder": "", "style": "IPY_MODEL_9c9246e66c2c45b18b9c282ecc94e0bf", "value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. " } }, "14751ec0d56d4125952134e35dda4b36": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "LabelModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6706a59598e042749e1f5e9916e81645", "placeholder": "", "style": "IPY_MODEL_d4eff178b2374120ae54be3616a834af", "value": "Token is valid (permission: write)." } }, "1c8ae80c5e454e938d1010b62895f7a2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_926085da23864493ad769225649e89ef", "placeholder": "", "style": "IPY_MODEL_e63dfce513c2412caaf4cb285d60151b", "value": "