How to finetune dolly v2-3b on a dataset? THe intial dataquery are working fine for my LLm ,I wish to finetune it on a dataset any leads on how to proceed?
def get_similar_docs(question, similar_doc_count):
return Vector_db.similarity_search(question, k=similar_doc_count)
template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
Instruction:.
Use only information in the following paragraphs to answer the question at the end.
Explain the answer with reference to the paragraphs.
{context}
Question: {question}
Response:
"""
def build_qa_chain():
torch.cuda.empty_cache()
model_name = "databricks/dolly-v2-3b"
quantization_config = BitsAndBytesConfig(load_in_8bit=True,)
instruct_pipeline = pipeline(model=model_name, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", return_full_text=True, max_new_tokens=300, top_p=0.95, top_k=50,model_kwargs={'load_in_8bit': True})
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True)
prompt = PromptTemplate(input_variables=['context', 'question'], template=template)
hf_pipe = HuggingFacePipeline(pipeline=instruct_pipeline)
return load_qa_chain(llm=hf_pipe, chain_type="stuff", prompt=prompt, verbose=True)
qa_chain = build_qa_chain()