gurgutan commited on
Commit
b28d871
1 Parent(s): 1b15946

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +101 -0
README.md ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ - ru
6
+ ---
7
+
8
+ # Saiga2-13B-4bit
9
+ This files are GPTQ model files for [saiga2-13B-lora](https://huggingface.co/IlyaGusev/saiga2_13b_lora) model.
10
+
11
+ ## Technical details
12
+ Model was quantized to 4-bit with [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) library
13
+
14
+ ## Examples of usage
15
+ First make sure you have [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) installed:
16
+
17
+ GITHUB_ACTIONS=true pip install auto-gptq
18
+
19
+ Then try the following example code:
20
+
21
+ ```python
22
+ from transformers import AutoTokenizer, TextGenerationPipeline
23
+ from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
24
+
25
+
26
+ class Conversation:
27
+ def __init__(
28
+ self,
29
+ message_template=DEFAULT_MESSAGE_TEMPLATE,
30
+ system_prompt=DEFAULT_SYSTEM_PROMPT,
31
+ start_token_id=1,
32
+ bot_token_id=9225
33
+ ):
34
+ self.message_template = message_template
35
+ self.start_token_id = start_token_id
36
+ self.bot_token_id = bot_token_id
37
+ self.messages = [{
38
+ "role": "system",
39
+ "content": system_prompt
40
+ }]
41
+
42
+ def get_start_token_id(self):
43
+ return self.start_token_id
44
+
45
+ def get_bot_token_id(self):
46
+ return self.bot_token_id
47
+
48
+ def add_user_message(self, message):
49
+ self.messages.append({
50
+ "role": "user",
51
+ "content": message
52
+ })
53
+
54
+ def add_bot_message(self, message):
55
+ self.messages.append({
56
+ "role": "bot",
57
+ "content": message
58
+ })
59
+
60
+ def get_prompt(self, tokenizer):
61
+ final_text = ""
62
+ for message in self.messages:
63
+ message_text = self.message_template.format(**message)
64
+ final_text += message_text
65
+ final_text += tokenizer.decode([self.start_token_id, self.bot_token_id])
66
+ return final_text.strip()
67
+
68
+
69
+ def generate(model, tokenizer, prompt, generation_config):
70
+ data = tokenizer(prompt, return_tensors="pt")
71
+ data = {k: v.to(model.device) for k, v in data.items()}
72
+ output_ids = model.generate(
73
+ **data,
74
+ generation_config=generation_config
75
+ )[0]
76
+ output_ids = output_ids[len(data["input_ids"][0]):]
77
+ output = tokenizer.decode(output_ids, skip_special_tokens=True)
78
+ return output.strip()
79
+
80
+
81
+ MODEL_NAME = "gurgutan/saiga2-13b-4bit"
82
+ DEFAULT_MESSAGE_TEMPLATE = "<s>{role}\n{content}</s>\n"
83
+ DEFAULT_SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
84
+
85
+
86
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
87
+ model = AutoGPTQForCausalLM.from_quantized(MODEL_NAME, device="cuda:0", use_safetensors=True, use_triton=False)
88
+ generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
89
+ model.eval()
90
+
91
+ input = "Сочини стих, который начинается словами: Буря мглою небо кроет"
92
+ conversation = Conversation()
93
+ conversation.add_user_message(input)
94
+ prompt = conversation.get_prompt(tokenizer)
95
+ output = generate(model, tokenizer, prompt, generation_config)
96
+ print(inp)
97
+ print(output)
98
+
99
+ ```
100
+ # Original model: [saiga2-13B-lora](https://huggingface.co/IlyaGusev/saiga2_13b_lora)
101
+ Модель ассистента на основе LLaMA2 дообученная на русскоязычных наборах. Модель имеет 13 млрд. параметров.