Response
I am not able to get this work. All i am getting is empty responses i have tried playing with parameters but still no luck any advise what to modify .
app = Flask(name)
@dataclass
class GenerationConfig:
temperature: float
top_k: int
top_p: float
repetition_penalty: float
max_new_tokens: int
seed: int
reset: bool
stream: bool
threads: int
stop: list
def format_prompt(system_prompt: str, user_prompt: str):
system_prompt = f"system\n{system_prompt}\n"
user_prompt = f"user\n{user_prompt}\n"
assistant_prompt = f"assistant\n"
return f"{system_prompt}{user_prompt}{assistant_prompt}"
def generate(
llm: AutoModelForCausalLM,
generation_config: GenerationConfig,
system_prompt: str,
user_input: str,
):
# return llm(
# format_prompt(
# system_prompt,
# user_prompt,
# ),
# **asdict(generation_config),
# )
model_output = llm(
format_prompt(system_prompt, user_input.strip()),
**asdict(generation_config),
)
print("Model output:", model_output)
return model_output
@app
.route('/generate', methods=['GET','POST'])
def generate_response_endpoint():
#user_input = request.data.decode('utf-8')
# Load the model and configuration
if request.method == 'GET':
user_input = request.args.get('user_input', '') # Get input from query parameter
elif request.method == 'POST':
user_input = request.data.decode('utf-8')
print("Loading model...")
#config = AutoConfig.from_pretrained("mosaicml/mpt-30b-chat", context_length=8192)
llm = AutoModelForCausalLM.from_pretrained(
"/home/azureuser/mpt-30B-inference/models/mpt-30b-chat.ggmlv0.q4_1.bin",
model_type="mpt"
)
print("model Loaded")
system_prompt = "Reply."
generation_config = GenerationConfig(
temperature=0.2,
top_k=0,
top_p=0.9,
repetition_penalty=1.0,
max_new_tokens=512,
seed=42,
reset=False,
stream=False,
threads=int(os.cpu_count() / 2), # adjust for your CPU
stop=["", "|<"],
)
generator = generate(llm, generation_config, system_prompt, user_input.strip())
#time.sleep(60)
print(generator)
response = generator
print(response)
return Response(response, content_type='text/plain; charset=utf-8')
if name == "main":
app.run(host='0.0.0.0', port=3002)