TheBloke/em_german_leo_mistral-GGUF · Problems with finetuning the model

Hello there,
my goal is to finetune one of the gguf model files of this repo and get a gguf file with the finetuned model.

Question: Did somebody finetune a model from a gguf file and can give me a tutorial on how to do this?

This is my approach so far:

For preparation I copied "em_german_leo_mistral.Q4_K_M.gguf" as well as the "config.json" from this repo to a local folder "models/german_mistral". I also downloaded the "tokenizer.json" and "tokenizer_config.json" from the original repo "https://huggingface.co/jphme/em_german_leo_mistral" to the folder. After installing and importing libraries with

!pip install -U pip
!pip install transformers[torch]
!pip install appdirs==1.4.4
!pip install bitsandbytes==0.37.2
!pip install datasets==2.10.1
!pip install fire==0.5.0
!pip install git+https://github.com/huggingface/peft.git
#!pip install git+https://github.com/huggingface/transformers.git
!pip install torch
!pip install sentencepiece==0.1.97
!pip install tensorboardX==2.6
!pip install gradio==3.23.0
!pip install accelerate

#imports

import transformers
#import accelerate
import textwrap
from transformers import LlamaTokenizer, LlamaForCausalLM
import os
import sys
from typing import List

from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_int8_training,
)

#import fire
import torch
from datasets import load_dataset

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

I now want to instantiate the model and the tokenizer using

from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("models/german_mistral", model_file="em_german_leo_mistral.Q4_K_M.gguf")
tokenizer.pad_token_id=tokenizer.eos_token_id
tokenizer.padding_side = "left"
model = AutoModelForCausalLM.from_pretrained("models/german_mistral", model_file="em_german_leo_mistral.Q4_K_M.gguf")

which results in the error

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
c:\Users\xxx\Repositories\finetuning\erstes_finetuning.ipynb Cell 5 line 1
----> 1 model = AutoModelForCausalLM.from_pretrained("models/german_mistral", model_file="em_german_leo_mistral.Q4_K_M.gguf")
      2 #AutoModelForSeq2SeqLM
      3 #model = AutoModelForSeq2SeqLM.from_pretrained("models/german_mistral", model_file="em_german_leo_mistral.Q4_K_M.gguf")

File c:\Users\xxx\Repositories\finetuning\.finetung_venv\Lib\site-packages\transformers\models\auto\auto_factory.py:566, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
    564 elif type(config) in cls._model_mapping.keys():
    565     model_class = _get_model_class(config, cls._model_mapping)
--> 566     return model_class.from_pretrained(
    567         pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
    568     )
    569 raise ValueError(
    570     f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
    571     f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
    572 )

File c:\Users\xxx\Repositories\finetuning\.finetung_venv\Lib\site-packages\transformers\modeling_utils.py:2992, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)
   2987         raise EnvironmentError(
   2988             f"Error no file named {_add_variant(SAFE_WEIGHTS_NAME, variant)} found in directory"
   2989             f" {pretrained_model_name_or_path}."
   2990         )
   2991     else:
-> 2992         raise EnvironmentError(
...
   2996         )
   2997 elif os.path.isfile(os.path.join(subfolder, pretrained_model_name_or_path)):
   2998     archive_file = pretrained_model_name_or_path

OSError: Error no file named pytorch_model.bin, tf_model.h5, model.ckpt.index or flax_model.msgpack found in directory models/german_mistral.

I interpret this as the "AutoModelForCausalLM.from_pretrained()" method to not recognize the model type.

First question: is there a way to tell the method the model type or does it simply not support gguf?

I could solve this problem by switching to ctransformers with

from ctransformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("models/german_mistral")#, model_type="gpt2", hf=True)

but when I want to finetune it afterwards using

# Prompt Engineering and Tokenisation

CUTOFF_LEN = 300


def generate_prompt(data_point):
    s = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.  # noqa: E501
    ### Instruction:
    {data_point["instruction"]}
    ### Input:
    {data_point["input"]}
    ### Response:
    {data_point["output"]}
    """
    return s

def tokenize(prompt, add_eos_token=True):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=CUTOFF_LEN,
        padding=False,
        return_tensors=None,
    )
    if (
        result["input_ids"][-1] != tokenizer.eos_token_id
        and len(result["input_ids"]) < CUTOFF_LEN
        and add_eos_token
    ):
        result["input_ids"].append(tokenizer.eos_token_id)
        result["attention_mask"].append(1)

    result["labels"] = result["input_ids"].copy()

    return result

def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenize(full_prompt)
    return tokenized_full_prompt

# Hyperparameters for fine_tuning

LORA_R = 8
LORA_ALPHA = 16
LORA_DROPOUT= 0.05
LORA_TARGET_MODULES = [
    "q_proj",
    "v_proj",
]

BATCH_SIZE = 128
MICRO_BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
LEARNING_RATE = 3e-4
TRAIN_STEPS = 10
OUTPUT_DIR = "experiments"

# Preparing model for training

model = prepare_model_for_int8_training(model)
config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=LORA_TARGET_MODULES,
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
model.print_trainable_parameters()

I get the error message

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
c:\Users\xxx\Repositories\finetuning\erstes_finetuning.ipynb Cell 14 line 3
      1 # Preparing model for training
----> 3 model = prepare_model_for_int8_training(model)
      4 config = LoraConfig(
      5     r=LORA_R,
      6     lora_alpha=LORA_ALPHA,
   (...)
     10     task_type="CAUSAL_LM",
     11 )
     12 model = get_peft_model(model, config)

File c:\Users\xxx\Repositories\finetuning\.finetung_venv\Lib\site-packages\peft\utils\other.py:140, in prepare_model_for_int8_training(*args, **kwargs)
    135 def prepare_model_for_int8_training(*args, **kwargs):
    136     warnings.warn(
    137         "prepare_model_for_int8_training is deprecated and will be removed in a future version. Use prepare_model_for_kbit_training instead.",
    138         FutureWarning,
    139     )
--> 140     return prepare_model_for_kbit_training(*args, **kwargs)

File c:\Users\xxx\Repositories\finetuning\.finetung_venv\Lib\site-packages\peft\utils\other.py:90, in prepare_model_for_kbit_training(model, use_gradient_checkpointing, gradient_checkpointing_kwargs)
     87 if gradient_checkpointing_kwargs is None:
     88     gradient_checkpointing_kwargs = {}
---> 90 for name, param in model.named_parameters():
...
    319 if name.startswith("ctransformers_llm_") and hasattr(lib, name):
    320     return partial(getattr(lib, name), llm)
--> 321 raise AttributeError(f"'LLM' object has no attribute '{name}'")

AttributeError: 'LLM' object has no attribute 'named_parameters'

I think this is a formatting issue as well.
Second question: Is there a way to solve this issue?