File size: 840 Bytes
5d7c6dd
 
 
 
 
 
 
 
 
 
 
b28c9f1
5d7c6dd
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
---
license: mit
---
How to use with vllm:
```
from vllm import LLM, SamplingParams
inputs = [
    "Who is the president of US?",
    "Can you speak Indonesian?"
]
# Initialize the LLM model
llm = LLM(model="jester6136/Phi-3.5-mini-instruct-awq", 
          quantization="AWQ", 
          gpu_memory_utilization=0.9, 
          max_model_len=2000, 
          max_num_seqs=32)
sparams = SamplingParams(temperature=0.0, max_tokens=2000, top_p=0.95,top_k=40,repetition_penalty=1.05)
chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>'
prompts = [chat_template.format(input=prompt) for prompt in inputs]
outputs = llm.generate(prompts, sparams)
# print out the model response
for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Prompt: {prompt}\nResponse: {generated_text}\n\n")
```