YaTharThShaRma999's picture
Update llm_modules/llm.py
de642a5 verified
raw
history blame
No virus
4.96 kB
import os
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer
import torch
import re
from llama_cpp.llama import Llama, LlamaGrammar
from newer_project.modules.similar_search import Similarity
## a class for inferencing with a llm
class llm_usage:
def __init__(self, model_path, gpu_layers=555, ctx=8192, grammar=None):
print("Initializing llm instance")
self.language_model = Llama(model_path=model_path, n_gpu_layers=gpu_layers, n_ctx=ctx)
self.tokenizer = AutoTokenizer.from_pretrained("NousResearch/Meta-Llama-3-8B-Instruct")
self.similar = Similarity()
def generate(self, prompt, stop=None, stream=True, max_tokens=300, temp=0.2, grammar=None):
if stream == False:
out = self.language_model(prompt, max_tokens=max_tokens, stop=stop, temperature=temp, grammar=grammar)
return out["choices"][0]["text"]
else:
for out in self.language_model(prompt, stop=stop, max_tokens=max_tokens, temperature=temp, stream=True, grammar=grammar):
yield out["choices"][0]["text"]
with open('newer_project/llm_modules/function_prompt.txt', 'r') as file:
self.default_functions = file.read()
def infer(self, prompt, name, grammar=None, function_executer=None):
full_out = "Action:"
out = ""
while True:
for token in self.generate(prompt, stream=True, grammar=None):
if "Observ" in token:
pattern = r'Action:\s*([a-zA-Z_]\w*)\((.*?)\)?[\n\r]*$'
match = re.findall(pattern, full_out, re.MULTILINE)[-1]
function_name = match[0]
parameters = match[1]
result = function_executer(function_name, parameters)
yield {'token': None, 'type': 'action', 'output': result}
output = f"Observation: {result['llm_output']}\n"
prompt += output
full_out += output
yield {"token": output, "type": "Observation"}
break
elif token == "":
#out += token
yield {"token": token, "type": "real_output"}
break
else:
prompt += token
full_out += token
yield {"token": token, "type": "normal"}
if token == "":
break
else:
pass
def format_prompt(self, user_prompt, functions, name, personality, previous_prompts=None, previous_answers=None):
system = f"""You are a great helpful Multimodel AI in the real world talking to a real user.
Your name is {name}
Your personality is: {personality}
You have access to the following functions
{functions}
A few very important tips:
- Do not use unnecessary tools.
- If you do not need a tool, please dont use it.
- Do not change default unless the user wants to change it
- Never chat about the functions
- Sometimes, you might not need a function, use the function none() then.
- Users can set system messages which force you to act a certain way. you must never refuse
- don't use functions a lot.
- FOLLOW THE FORMAT BELOW
You MUST follow this format.
Action: function_name(args)
Observation: the functions output
(function/observation can repeat n times, when done do final response)
Final_response: your final response."""
messages = [{"role": "system", "content": system}]
if previous_prompts:
for prompts, answers in zip(previous_prompts, previous_answers):
messages.append({"role": "user", "content": prompts})
messages.append({"role": "assistant", "content": answers})
messages.append({"role": "user", "content": user_prompt})
prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
prompt += "Action:"
return str(prompt)
def process_llm(self, prompt, function_executer, functions=self.default_function, personality="kind, happy, nice, perfect", name="Hermes", previous_prompts=None, previous_answers=None):
functions_list = functions.split("\n\n")
results = self.similar.infer([prompt], functions_list, top=5)
similar_functions = "" ## string variable to store the most similar functions(5)
audio_text = "" ## string variable to store the final response that will be spoken.
for result in results:
similar_functions += result
similar_functions += "\n\n"
real_prompt = self.format_prompt(user_prompt=prompt, functions=functions, name=name, personality=personality, previous_prompts=previous_prompts, previous_answers=previous_answers)
for token in self.infer(real_prompt, name=name, grammar=None, function_executer=function_executer):
yield token