{ "cells": [ { "cell_type": "markdown", "id": "873f35d6-df57-48e0-a60a-45d3c9e15c96", "metadata": {}, "source": [ "# Bonito: Synthetic Data Generation" ] }, { "cell_type": "markdown", "id": "c283bd6d-ea2f-4a6f-99ac-8887e7621bcb", "metadata": { "execution": { "iopub.execute_input": "2024-09-17T08:32:57.354413Z", "iopub.status.busy": "2024-09-17T08:32:57.354090Z", "iopub.status.idle": "2024-09-17T08:32:57.357571Z", "shell.execute_reply": "2024-09-17T08:32:57.356909Z", "shell.execute_reply.started": "2024-09-17T08:32:57.354388Z" } }, "source": [ "#### Environment Setup" ] }, { "cell_type": "raw", "id": "df65c34e-9080-4e11-9b21-12fa6f459736", "metadata": {}, "source": [ "pip freeze >> requirements_bonito.txt" ] }, { "cell_type": "raw", "id": "3502e6ea-1ce9-497a-8bfa-dfc6ff215d07", "metadata": { "execution": { "iopub.execute_input": "2024-09-17T08:26:59.911435Z", "iopub.status.busy": "2024-09-17T08:26:59.911244Z", "iopub.status.idle": "2024-09-17T08:26:59.913754Z", "shell.execute_reply": "2024-09-17T08:26:59.913427Z", "shell.execute_reply.started": "2024-09-17T08:26:59.911420Z" } }, "source": [ "conda create -n bonito python=3.9\n", "conda activate bonito\n", "pip install -e ." ] }, { "cell_type": "raw", "id": "0940de6a-0e05-41f0-9d72-2f206bf77ff0", "metadata": {}, "source": [ "pip install autoawq\n", "pip install flash-attn==2.5.6 --no-build-isolation" ] }, { "cell_type": "code", "execution_count": 18, "id": "b33c2d03-9cb9-4f31-bcae-d81941486e25", "metadata": { "execution": { "iopub.execute_input": "2024-09-17T08:33:56.866768Z", "iopub.status.busy": "2024-09-17T08:33:56.866424Z", "iopub.status.idle": "2024-09-17T08:33:56.872656Z", "shell.execute_reply": "2024-09-17T08:33:56.872137Z", "shell.execute_reply.started": "2024-09-17T08:33:56.866742Z" } }, "outputs": [ { "data": { "text/plain": [ "('2.4.0+cu121', '4.44.2', '0.6.1.post2', '0.2.6')" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import transformers\n", "import torch\n", "import vllm\n", "import awq\n", "\n", "torch.__version__, transformers.__version__, vllm.__version__, awq.__version__" ] }, { "cell_type": "markdown", "id": "987a88f2-2240-47ab-b0b7-d1589a6da59e", "metadata": {}, "source": [ "#### Quantized Bonito Wrapper\n", "This is a simplified quantized bonito class to generate a single synthetic input-output instruction for a given text and task type.\n", "This code uses huggingface `transformers` library for generation.\n", "For complete functionality and faster generations, we recommend using the `Bonito` class from the package." ] }, { "cell_type": "code", "execution_count": 13, "id": "20855b3e-e864-4875-9135-bf1242a99242", "metadata": { "execution": { "iopub.execute_input": "2024-09-17T08:26:25.162113Z", "iopub.status.busy": "2024-09-17T08:26:25.161926Z", "iopub.status.idle": "2024-09-17T08:26:25.167686Z", "shell.execute_reply": "2024-09-17T08:26:25.166893Z", "shell.execute_reply.started": "2024-09-17T08:26:25.162099Z" } }, "outputs": [], "source": [ "from typing import Dict, Optional, Union\n", "from datasets import Dataset\n", "\n", "SHORTFORM_TO_FULL_TASK_TYPES = {\n", " \"exqa\": \"extractive question answering\",\n", " \"mcqa\": \"multiple-choice question answering\",\n", " \"qg\": \"question generation\",\n", " \"qa\": \"question answering without choices\",\n", " \"ynqa\": \"yes-no question answering\",\n", " \"coref\": \"coreference resolution\",\n", " \"paraphrase\": \"paraphrase generation\",\n", " \"paraphrase_id\": \"paraphrase identification\",\n", " \"sent_comp\": \"sentence completion\",\n", " \"sentiment\": \"sentiment\",\n", " \"summarization\": \"summarization\",\n", " \"text_gen\": \"text generation\",\n", " \"topic_class\": \"topic classification\",\n", " \"wsd\": \"word sense disambiguation\",\n", " \"te\": \"textual entailment\",\n", " \"nli\": \"natural language inference\",\n", "}\n", "\n", "class AbstractBonito:\n", " def _prepare_bonito_input(\n", " self, context_dataset: Dataset, task_type: str, context_col: str, **kwargs\n", " ) -> Dataset:\n", " \"\"\"\n", " Prepares the input for the Bonito model.\n", "\n", " This method takes a context dataset, a task type, and a context\n", " column name, and prepares the dataset for the Bonito model.\n", " If the task type is not recognized, it raises a ValueError.\n", "\n", " Args:\n", " context_dataset (Dataset): The dataset that provides the\n", " context for the task.\n", " task_type (str): The type of the task. This can be a\n", " short form or a full form. If the task type is not\n", " recognized, a ValueError is raised.\n", " context_col (str): The name of the column in the dataset\n", " that provides the context for the task.\n", " **kwargs: Additional keyword arguments.\n", "\n", " Returns:\n", " Dataset: The prepared dataset for the Bonito model.\n", " \"\"\"\n", " # get the task type name\n", " if task_type in SHORTFORM_TO_FULL_TASK_TYPES.values():\n", " full_task_type = task_type\n", " elif task_type in SHORTFORM_TO_FULL_TASK_TYPES:\n", " full_task_type = SHORTFORM_TO_FULL_TASK_TYPES[task_type]\n", " else:\n", " raise ValueError(f\"Task type {task_type} not recognized\")\n", "\n", " def process(example):\n", " input_text = \"<|tasktype|>\\n\" + full_task_type.strip()\n", " input_text += (\n", " \"\\n<|context|>\\n\" + example[context_col].strip() + \"\\n<|task|>\\n\"\n", " )\n", " return {\n", " \"input\": input_text,\n", " }\n", "\n", " return context_dataset.map(\n", " process,\n", " remove_columns=context_dataset.column_names,\n", " num_proc=kwargs.get(\"num_proc\", 1),\n", " )\n", "\n", " def _postprocess_dataset(\n", " self, synthetic_dataset: Dataset, context_col: str, **kwargs\n", " ) -> Dataset:\n", " \"\"\"\n", " Post-processes the synthetic dataset.\n", "\n", " This method takes a synthetic dataset and a context column\n", " name, and post-processes the dataset. It filters out\n", " examples where the prediction does not contain exactly two\n", " parts separated by \"<|pipe|>\", and then maps each example to a\n", " new format where the context is inserted into the first part of\n", " the prediction and the second part of the prediction is used as\n", " the output.\n", "\n", " Args:\n", " synthetic_dataset (Dataset): The synthetic dataset to be\n", " post-processed.\n", " context_col (str): The name of the column in the dataset\n", " that provides the context for the tasks.\n", " **kwargs: Additional keyword arguments.\n", "\n", " Returns:\n", " Dataset: The post-processed synthetic dataset.\n", " \"\"\"\n", " synthetic_dataset = synthetic_dataset.filter(\n", " lambda example: len(example[\"prediction\"].split(\"<|pipe|>\")) == 2\n", " )\n", "\n", " def process(example):\n", " pair = example[\"prediction\"].split(\"<|pipe|>\")\n", " context = example[context_col].strip()\n", " return {\n", " \"input\": pair[0].strip().replace(\"{{context}}\", context),\n", " \"output\": pair[1].strip(),\n", " }\n", "\n", " synthetic_dataset = synthetic_dataset.map(\n", " process,\n", " remove_columns=synthetic_dataset.column_names,\n", " num_proc=kwargs.get(\"num_proc\", 1),\n", " )\n", "\n", " return synthetic_dataset\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "ea7d9840-162e-4c1e-b25d-354b1e110bdc", "metadata": { "execution": { "iopub.execute_input": "2024-09-17T08:19:16.539490Z", "iopub.status.busy": "2024-09-17T08:19:16.539225Z", "iopub.status.idle": "2024-09-17T08:19:16.807696Z", "shell.execute_reply": "2024-09-17T08:19:16.807297Z", "shell.execute_reply.started": "2024-09-17T08:19:16.539469Z" } }, "outputs": [], "source": [ "from typing import Optional, List, Dict\n", "from datasets import Dataset\n", "from awq import AutoAWQForCausalLM\n", "from transformers import AutoTokenizer\n", "\n", "class AWQBonito(AbstractBonito):\n", " def __init__(self, model_name_or_path):\n", " self.model = AutoAWQForCausalLM.from_quantized(model_name_or_path, fuse_layers=True, torch_dtype=torch.float16)\n", " self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) \n", " \n", " def generate_task(\n", " self,\n", " unannotated_paragraph: str,\n", " task_type: str,\n", " sampling_params: dict,\n", " context_col=\"input\"\n", " ) -> Dict:\n", " \"\"\"\n", " Generates synthetic instruction tuning pair using the Quantized Bonito model.\n", " This method takes a text unannotated text, a task type, and sampling parameters,\n", " and generates synthetic input-output pair.\n", "\n", " Args:\n", " unannotated_paragraph (str): The unannotated text or a paragraph\n", " task_type (str): The type of the tasks. This can be a\n", " short form or a full form.\n", " sampling_params (dict): The parameters for\n", " sampling.\n", " **kwargs: Additional keyword arguments.\n", "\n", " Returns:\n", " Dict: The synthetic input-output pair for the task type.\n", " \"\"\"\n", "\n", " text_dataset = Dataset.from_list([{context_col: unannotated_paragraph}])\n", "\n", " processed_dataset = self._prepare_bonito_input(\n", " text_dataset, task_type, context_col=context_col\n", " )\n", "\n", " outputs = self._generate_text(processed_dataset[\"input\"], sampling_params)\n", " examples = []\n", " for i, example in enumerate(text_dataset.to_list()):\n", " output = outputs[i]\n", " example[\"prediction\"] = output.strip()\n", " examples.append(example)\n", "\n", " synthetic_dataset = Dataset.from_list(examples)\n", "\n", " # filter out the examples that cannot be parsed\n", " synthetic_dataset_dict = self._postprocess_dataset(\n", " synthetic_dataset, context_col=context_col\n", " ).to_list()[0]\n", "\n", " return synthetic_dataset_dict\n", "\n", " def _generate_text(\n", " self,\n", " dataset: Dataset,\n", " sampling_params: dict,\n", " ) -> List[str]:\n", " \"\"\"\n", " Generate text using huggingface transformers generate function.\n", "\n", " This method takes a dataset of prompts, encodes them,\n", " generates text using the model, decodes the generated\n", " text, and appends it to a list.\n", "\n", " Args:\n", " dataset (Dataset): A dataset containing prompts for text generation.\n", " sampling_params (dict): Parameters for sampling during generation.\n", "\n", " Returns:\n", " List[str]: A list of generated texts corresponding to the prompts.\n", " \"\"\"\n", " generated_texts = []\n", "\n", " for prompt in dataset:\n", " input_ids = self.tokenizer.encode(prompt, return_tensors=\"pt\")\n", " input_ids = input_ids.cuda()\n", "\n", " output = self.model.generate(input_ids, do_sample=True, **sampling_params)\n", "\n", " generated_text = self.tokenizer.decode(\n", " output[0][len(input_ids[0]) :], skip_special_tokens=True\n", " )\n", " generated_texts.append(generated_text)\n", "\n", " return generated_texts" ] }, { "cell_type": "code", "execution_count": 5, "id": "7f773af6-b700-4368-aaed-6bda08560e16", "metadata": { "execution": { "iopub.execute_input": "2024-09-17T08:19:17.123688Z", "iopub.status.busy": "2024-09-17T08:19:17.123507Z", "iopub.status.idle": "2024-09-17T08:19:17.127238Z", "shell.execute_reply": "2024-09-17T08:19:17.126899Z", "shell.execute_reply.started": "2024-09-17T08:19:17.123674Z" } }, "outputs": [], "source": [ "from datasets import Dataset\n", "from vllm import LLM, SamplingParams\n", "\n", "class VLLMBonito(LLM, AbstractBonito):\n", " \n", " def generate_tasks(\n", " self,\n", " text_dataset: Dataset,\n", " context_col: str,\n", " task_type: str,\n", " sampling_params: SamplingParams,\n", " **kwargs,\n", " ):\n", " \"\"\"\n", " Generates tasks using the Bonito model.\n", "\n", " This method takes a text dataset, a context column name,\n", " a task type, and sampling parameters, and generates tasks\n", " using the Bonito model. It processes the input dataset,\n", " generates outputs, collects multiple generations into\n", " one dataset object, and filters out the examples that\n", " cannot be parsed.\n", "\n", " Args:\n", " text_dataset (Dataset): The dataset that provides the text\n", " for the tasks.\n", " context_col (str): The name of the column in the dataset\n", " that provides the context for the tasks.\n", " task_type (str): The type of the tasks. This can be a\n", " short form or a full form.\n", " sampling_params (SamplingParams): The parameters for\n", " sampling.\n", " **kwargs: Additional keyword arguments.\n", "\n", " Returns:\n", " Dataset: The synthetic dataset with the generated tasks.\n", " \"\"\"\n", " processed_dataset = self._prepare_bonito_input(\n", " text_dataset, task_type, context_col, **kwargs\n", " )\n", " outputs = self.generate(processed_dataset[\"input\"], sampling_params)\n", "\n", " # collect multiple generations into one dataset object\n", " examples = []\n", " for i, example in enumerate(text_dataset.to_list()):\n", " for output in outputs[i].outputs:\n", " examples.append(\n", " {\"context\": example[context_col], \"prediction\": output.text.strip()}\n", " )\n", "\n", " synthetic_dataset = Dataset.from_list(examples)\n", "\n", " # filter out the examples that cannot be parsed\n", " synthetic_dataset = self._postprocess_dataset(\n", " synthetic_dataset, context_col=\"context\", **kwargs\n", " )\n", "\n", " return synthetic_dataset" ] }, { "cell_type": "markdown", "id": "5d666ab6-7722-4afd-9bde-dad5039f442e", "metadata": { "execution": { "iopub.execute_input": "2024-09-17T07:41:42.559750Z", "iopub.status.busy": "2024-09-17T07:41:42.559531Z", "iopub.status.idle": "2024-09-17T07:41:42.563349Z", "shell.execute_reply": "2024-09-17T07:41:42.562597Z", "shell.execute_reply.started": "2024-09-17T07:41:42.559738Z" } }, "source": [ "## Synthetic Data Generation\n", "Here we will load the quantized bonito model and generate synthetic instruction for the unannotated text.\n", "\n", "```\n", "SHORTFORM_TO_FULL_TASK_TYPES = {\n", " \"exqa\": \"extractive question answering\",\n", " \"mcqa\": \"multiple-choice question answering\",\n", " \"qg\": \"question generation\",\n", " \"qa\": \"question answering without choices\",\n", " \"ynqa\": \"yes-no question answering\",\n", " \"coref\": \"coreference resolution\",\n", " \"paraphrase\": \"paraphrase generation\",\n", " \"paraphrase_id\": \"paraphrase identification\",\n", " \"sent_comp\": \"sentence completion\",\n", " \"sentiment\": \"sentiment\",\n", " \"summarization\": \"summarization\",\n", " \"text_gen\": \"text generation\",\n", " \"topic_class\": \"topic classification\",\n", " \"wsd\": \"word sense disambiguation\",\n", " \"te\": \"textual entailment\",\n", " \"nli\": \"natural language inference\",\n", "}\n", "```" ] }, { "cell_type": "markdown", "id": "a577b15a-fd75-48bb-8faf-bb97a424db48", "metadata": {}, "source": [ "### Generate the synthetic instructions\n", "After loading the model, we pass the unannotated paragraph and the task type to generate the instructions.\n", "Here we generate an NLI task:" ] }, { "cell_type": "code", "execution_count": 11, "id": "3704550e-e109-404c-8d0e-127bcebf28e6", "metadata": { "execution": { "iopub.execute_input": "2024-09-17T08:22:59.468174Z", "iopub.status.busy": "2024-09-17T08:22:59.467863Z", "iopub.status.idle": "2024-09-17T08:22:59.472978Z", "shell.execute_reply": "2024-09-17T08:22:59.472332Z", "shell.execute_reply.started": "2024-09-17T08:22:59.468149Z" } }, "outputs": [ { "data": { "text/plain": [ "'\\n1. “Confidential Information”, whenever used in this Agreement, shall mean any data, document, specification and other information or material, that is delivered or disclosed by UNHCR to the Recipient in any form whatsoever, whether orally, visually in writing or otherwise (including computerized form), and that, at the time of disclosure to the Recipient, is designated as confidential.\\n'" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## sample text\n", "unannotated_paragraph = \"\"\"\n", "1. “Confidential Information”, whenever used in this Agreement, shall mean any data, document, specification and other information or material, that is delivered or disclosed by UNHCR to the Recipient in any form whatsoever, whether orally, visually in writing or otherwise (including computerized form), and that, at the time of disclosure to the Recipient, is designated as confidential.\n", "\"\"\"\n", "unannotated_paragraph" ] }, { "cell_type": "markdown", "id": "aa1e941d-317d-415d-b2fb-5177394f68f9", "metadata": { "execution": { "iopub.execute_input": "2024-09-17T08:31:33.945680Z", "iopub.status.busy": "2024-09-17T08:31:33.945338Z", "iopub.status.idle": "2024-09-17T08:31:33.949066Z", "shell.execute_reply": "2024-09-17T08:31:33.948384Z", "shell.execute_reply.started": "2024-09-17T08:31:33.945653Z" } }, "source": [ "### Quantized Bonito : AWQ Inference" ] }, { "cell_type": "code", "execution_count": null, "id": "012d7f03-3629-45a0-be38-64b90dc6ce0a", "metadata": {}, "outputs": [], "source": [ "import torch\n", "\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "model_id = 'mychen76/Llama-3.1-8B-bonito-v1-awq'\n", "bonito = AWQBonito(model_id)" ] }, { "cell_type": "markdown", "id": "4c70d39e-f790-47c3-9b2c-ed5212cc6416", "metadata": { "execution": { "iopub.execute_input": "2024-09-17T08:17:55.265931Z", "iopub.status.busy": "2024-09-17T08:17:55.265618Z", "iopub.status.idle": "2024-09-17T08:17:55.269425Z", "shell.execute_reply": "2024-09-17T08:17:55.268769Z", "shell.execute_reply.started": "2024-09-17T08:17:55.265905Z" } }, "source": [ "### generate nli" ] }, { "cell_type": "code", "execution_count": null, "id": "ede84178-63b1-4df0-a1c5-1dfefa0a986e", "metadata": {}, "outputs": [], "source": [ "from transformers import set_seed\n", "from pprint import pprint\n", "set_seed(2)\n", "\n", "# Generate synthetic instruction tuning dataset\n", "sampling_params = {\n", " \"max_new_tokens\": 256,\n", " \"top_p\": 0.95,\n", " \"temperature\": 0.7,\n", " \"num_return_sequences\": 1,\n", "}\n", "synthetic_dataset = bonito.generate_task(\n", " unannotated_paragraph, task_type=\"nli\", sampling_params=sampling_params\n", ")\n", "pprint(\"----Generated Instructions----\")\n", "pprint(f'Input: {synthetic_dataset[\"input\"]}')\n", "pprint(f'Output: {synthetic_dataset[\"output\"]}')" ] }, { "cell_type": "markdown", "id": "a7fcf6e8-f6cb-4ac8-b37b-f193ffd6f924", "metadata": { "execution": { "iopub.execute_input": "2024-09-17T08:18:08.288818Z", "iopub.status.busy": "2024-09-17T08:18:08.288500Z", "iopub.status.idle": "2024-09-17T08:18:08.292090Z", "shell.execute_reply": "2024-09-17T08:18:08.291481Z", "shell.execute_reply.started": "2024-09-17T08:18:08.288792Z" } }, "source": [ "### generate qa" ] }, { "cell_type": "code", "execution_count": null, "id": "ef1d3ba1-5a2f-4a7b-adce-5b030df2fb73", "metadata": {}, "outputs": [], "source": [ "from transformers import set_seed\n", "from pprint import pprint\n", "set_seed(2)\n", "\n", "# Generate synthetic instruction tuning dataset\n", "sampling_params = {\n", " \"max_new_tokens\": 256,\n", " \"top_p\": 0.95,\n", " \"temperature\": 0.7,\n", " \"num_return_sequences\": 1,\n", "}\n", "synthetic_dataset = bonito.generate_task(\n", " unannotated_paragraph, task_type=\"qa\", sampling_params=sampling_params\n", ")\n", "pprint(\"----Generated Instructions----\")\n", "pprint(f'Input: {synthetic_dataset[\"input\"]}')\n", "pprint(f'Output: {synthetic_dataset[\"output\"]}')" ] }, { "cell_type": "markdown", "id": "e9f79b37-d1bc-49bf-8d7d-fc2d088846c0", "metadata": {}, "source": [ "### vLLM Bonito : AWQ Inference\n", "ValueError: The model's max seq len (131072) is larger than the maximum number of tokens that can be stored in KV cache (129248). Try increasing `gpu_memory_utilization` or decreasing `max_model_len` when initializing the engine." ] }, { "cell_type": "code", "execution_count": 1, "id": "1a3ffd80-94f0-47bc-b1fe-47444bda66b6", "metadata": { "execution": { "iopub.execute_input": "2024-09-17T08:19:02.242069Z", "iopub.status.busy": "2024-09-17T08:19:02.241824Z", "iopub.status.idle": "2024-09-17T08:19:04.107622Z", "shell.execute_reply": "2024-09-17T08:19:04.107273Z", "shell.execute_reply.started": "2024-09-17T08:19:02.242049Z" } }, "outputs": [], "source": [ "from datasets import load_dataset\n", "\n", "# load dataaset with unannotated text\n", "unannotated_text_ds = load_dataset(\n", " \"BatsResearch/bonito-experiment\",\n", " \"unannotated_contract_nli\"\n", ")[\"train\"].select(range(10))" ] }, { "cell_type": "code", "execution_count": 6, "id": "98f9a917-070d-4c10-9d29-5ecce6c786bb", "metadata": { "execution": { "iopub.execute_input": "2024-09-17T08:19:22.658366Z", "iopub.status.busy": "2024-09-17T08:19:22.658098Z", "iopub.status.idle": "2024-09-17T08:19:39.785070Z", "shell.execute_reply": "2024-09-17T08:19:39.784677Z", "shell.execute_reply.started": "2024-09-17T08:19:22.658345Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO 09-17 04:19:22 awq_marlin.py:93] Detected that the model can run with awq_marlin, however you specified quantization=awq explicitly, so forcing awq. Use quantization=awq_marlin for faster inference\n", "WARNING 09-17 04:19:22 config.py:338] awq quantization is not fully optimized yet. The speed can be slower than non-quantized models.\n", "INFO 09-17 04:19:22 llm_engine.py:223] Initializing an LLM engine (v0.6.1.post2) with config: model='mychen76/Llama-3.1-8B-bonito-v1-awq', speculative_config=None, tokenizer='mychen76/Llama-3.1-8B-bonito-v1-awq', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=mychen76/Llama-3.1-8B-bonito-v1-awq, use_v2_block_manager=False, num_scheduler_steps=1, enable_prefix_caching=False, use_async_output_proc=True)\n", "INFO 09-17 04:19:23 model_runner.py:997] Starting to load model mychen76/Llama-3.1-8B-bonito-v1-awq...\n", "INFO 09-17 04:19:23 weight_utils.py:242] Using model weights format ['*.safetensors']\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0e53544149e94bacbc0a1c854690f32a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading safetensors checkpoint shards: 0% Completed | 0/2 [00:00