jester6136
commited on
Commit
•
5d7c6dd
1
Parent(s):
1a85ace
Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,26 @@
|
|
1 |
-
---
|
2 |
-
license: mit
|
3 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
---
|
4 |
+
How to use with vllm:
|
5 |
+
```
|
6 |
+
from vllm import LLM, SamplingParams
|
7 |
+
inputs = [
|
8 |
+
"Who is the president of US?",
|
9 |
+
"Can you speak Indonesian?"
|
10 |
+
]
|
11 |
+
# Initialize the LLM model
|
12 |
+
llm = LLM(model="jester6136/SeaLLMs-v3-1.5B-Chat-AWQ",
|
13 |
+
quantization="AWQ",
|
14 |
+
gpu_memory_utilization=0.9,
|
15 |
+
max_model_len=2000,
|
16 |
+
max_num_seqs=32)
|
17 |
+
sparams = SamplingParams(temperature=0.0, max_tokens=2000, top_p=0.95,top_k=40,repetition_penalty=1.05)
|
18 |
+
chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>'
|
19 |
+
prompts = [chat_template.format(input=prompt) for prompt in inputs]
|
20 |
+
outputs = llm.generate(prompts, sparams)
|
21 |
+
# print out the model response
|
22 |
+
for output in outputs:
|
23 |
+
prompt = output.prompt
|
24 |
+
generated_text = output.outputs[0].text
|
25 |
+
print(f"Prompt: {prompt}\nResponse: {generated_text}\n\n")
|
26 |
+
```
|