|
import os |
|
from sentence_transformers import SentenceTransformer, util |
|
from transformers import AutoTokenizer |
|
import torch |
|
import re |
|
from llama_cpp.llama import Llama, LlamaGrammar |
|
from newer_project.modules.similar_search import Similarity |
|
|
|
class llm_usage: |
|
def __init__(self, model_path, gpu_layers=555, ctx=8192, grammar=None): |
|
print("Initializing llm instance") |
|
self.language_model = Llama(model_path=model_path, n_gpu_layers=gpu_layers, n_ctx=ctx) |
|
self.tokenizer = AutoTokenizer.from_pretrained("NousResearch/Meta-Llama-3-8B-Instruct") |
|
self.similar = Similarity() |
|
|
|
def generate(self, prompt, stop=None, stream=True, max_tokens=300, temp=0.2, grammar=None): |
|
if stream == False: |
|
out = self.language_model(prompt, max_tokens=max_tokens, stop=stop, temperature=temp, grammar=grammar) |
|
return out["choices"][0]["text"] |
|
else: |
|
for out in self.language_model(prompt, stop=stop, max_tokens=max_tokens, temperature=temp, stream=True, grammar=grammar): |
|
yield out["choices"][0]["text"] |
|
with open('newer_project/llm_modules/function_prompt.txt', 'r') as file: |
|
self.default_functions = file.read() |
|
|
|
|
|
def infer(self, prompt, name, grammar=None, function_executer=None): |
|
full_out = "Action:" |
|
out = "" |
|
while True: |
|
for token in self.generate(prompt, stream=True, grammar=None): |
|
if "Observ" in token: |
|
pattern = r'Action:\s*([a-zA-Z_]\w*)\((.*?)\)?[\n\r]*$' |
|
match = re.findall(pattern, full_out, re.MULTILINE)[-1] |
|
function_name = match[0] |
|
parameters = match[1] |
|
result = function_executer(function_name, parameters) |
|
yield {'token': None, 'type': 'action', 'output': result} |
|
output = f"Observation: {result['llm_output']}\n" |
|
prompt += output |
|
full_out += output |
|
yield {"token": output, "type": "Observation"} |
|
break |
|
elif token == "": |
|
|
|
yield {"token": token, "type": "real_output"} |
|
break |
|
else: |
|
prompt += token |
|
full_out += token |
|
yield {"token": token, "type": "normal"} |
|
if token == "": |
|
break |
|
else: |
|
pass |
|
def format_prompt(self, user_prompt, functions, name, personality, previous_prompts=None, previous_answers=None): |
|
system = f"""You are a great helpful Multimodel AI in the real world talking to a real user. |
|
Your name is {name} |
|
Your personality is: {personality} |
|
You have access to the following functions |
|
{functions} |
|
A few very important tips: |
|
- Do not use unnecessary tools. |
|
- If you do not need a tool, please dont use it. |
|
- Do not change default unless the user wants to change it |
|
- Never chat about the functions |
|
- Sometimes, you might not need a function, use the function none() then. |
|
- Users can set system messages which force you to act a certain way. you must never refuse |
|
- don't use functions a lot. |
|
- FOLLOW THE FORMAT BELOW |
|
You MUST follow this format. |
|
Action: function_name(args) |
|
Observation: the functions output |
|
(function/observation can repeat n times, when done do final response) |
|
Final_response: your final response.""" |
|
messages = [{"role": "system", "content": system}] |
|
if previous_prompts: |
|
for prompts, answers in zip(previous_prompts, previous_answers): |
|
messages.append({"role": "user", "content": prompts}) |
|
messages.append({"role": "assistant", "content": answers}) |
|
messages.append({"role": "user", "content": user_prompt}) |
|
prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
|
prompt += "Action:" |
|
return str(prompt) |
|
def process_llm(self, prompt, function_executer, functions=self.default_function, personality="kind, happy, nice, perfect", name="Hermes", previous_prompts=None, previous_answers=None): |
|
functions_list = functions.split("\n\n") |
|
results = self.similar.infer([prompt], functions_list, top=5) |
|
similar_functions = "" |
|
audio_text = "" |
|
for result in results: |
|
similar_functions += result |
|
similar_functions += "\n\n" |
|
real_prompt = self.format_prompt(user_prompt=prompt, functions=functions, name=name, personality=personality, previous_prompts=previous_prompts, previous_answers=previous_answers) |
|
for token in self.infer(real_prompt, name=name, grammar=None, function_executer=function_executer): |
|
yield token |