Update README.md
Browse files
README.md
CHANGED
@@ -5,14 +5,95 @@ datasets:
|
|
5 |
- IlyaGusev/ru_turbo_alpaca_evol_instruct
|
6 |
- IlyaGusev/ru_turbo_alpaca
|
7 |
- IlyaGusev/ru_turbo_saiga
|
|
|
8 |
language:
|
9 |
- ru
|
10 |
pipeline_tag: question-answering
|
11 |
---
|
12 |
-
The model was trained on part of the datasets
|
13 |
|
14 |
-
|
15 |
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
GPU A100
|
|
|
5 |
- IlyaGusev/ru_turbo_alpaca_evol_instruct
|
6 |
- IlyaGusev/ru_turbo_alpaca
|
7 |
- IlyaGusev/ru_turbo_saiga
|
8 |
+
- RussianNLP/russian_super_glue
|
9 |
language:
|
10 |
- ru
|
11 |
pipeline_tag: question-answering
|
12 |
---
|
13 |
+
The model was trained on part of the datasets
|
14 |
|
15 |
+
*IlyaGusev/gazeta* ,
|
16 |
|
17 |
+
*IlyaGusev/ru_turbo_alpaca_evol_instruct*,
|
18 |
+
|
19 |
+
*IlyaGusev/ru_turbo_alpaca*,
|
20 |
+
|
21 |
+
*IlyaGusev/ru_turbo_saiga* ,
|
22 |
+
|
23 |
+
*RussianNLP/russian_super_glue (muserc)*
|
24 |
+
|
25 |
+
using LoRA
|
26 |
+
|
27 |
+
#### Base_model NousResearch/Yarn-Llama-2-7b-64k
|
28 |
+
|
29 |
+
#### Need cuda > 11.4
|
30 |
+
|
31 |
+
### GPU A100
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
```python
|
37 |
+
|
38 |
+
!pip install peft
|
39 |
+
!pip install flash-attn --no-build-isolation
|
40 |
+
!pip install git+https://github.com/HazyResearch/flash-attention.git#subdirectory=csrc/rotary
|
41 |
+
|
42 |
+
```
|
43 |
+
```python
|
44 |
+
model = AutoModelForCausalLM.from_pretrained(
|
45 |
+
'geldarr/saiga-Yarn-Llama-2-7b-64k',
|
46 |
+
trust_remote_code=True,
|
47 |
+
torch_dtype=torch.float16,
|
48 |
+
device_map={'':0}
|
49 |
+
)
|
50 |
+
tokenizer = AutoTokenizer.from_pretrained('geldarr/saiga-Yarn-Llama-2-7b-64k', use_fast=False)
|
51 |
+
|
52 |
+
```
|
53 |
+
|
54 |
+
```python
|
55 |
+
big_prompts = '''<s>system\nТы — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им.</s>\n
|
56 |
+
<s>user
|
57 |
+
Дай ответы на вопрос основываясь только на тексте ниже:\n
|
58 |
+
вопрос?
|
59 |
+
|
60 |
+
Текст <65536 tokens
|
61 |
+
|
62 |
+
</s>
|
63 |
+
<s>bot
|
64 |
+
'''
|
65 |
+
```python
|
66 |
+
gen_config = {
|
67 |
+
"pad_token_id": 0,
|
68 |
+
"bos_token_id": 1,
|
69 |
+
"eos_token_id": 2,
|
70 |
+
"temperature": 0.4,
|
71 |
+
"top_p": 0.9,
|
72 |
+
"top_k": 50,
|
73 |
+
"do_sample": True,
|
74 |
+
"max_new_tokens": 15360,
|
75 |
+
"repetition_penalty": 1.1,
|
76 |
+
"no_repeat_ngram_size": 15,
|
77 |
+
}
|
78 |
+
generation_config = GenerationConfig.from_dict(gen_config)
|
79 |
+
```
|
80 |
+
|
81 |
+
```python
|
82 |
+
def generate(model, tokenizer, prompt, generation_config):
|
83 |
+
data = tokenizer(prompt, return_tensors="pt")
|
84 |
+
|
85 |
+
data = {k: v.to(model.device) for k, v in data.items()}
|
86 |
+
output_ids = model.generate(
|
87 |
+
**data,
|
88 |
+
generation_config=generation_config
|
89 |
+
)[0]
|
90 |
+
output_ids = output_ids[len(data["input_ids"][0]):]
|
91 |
+
output = tokenizer.decode(output_ids)
|
92 |
+
return output.strip()
|
93 |
+
|
94 |
+
output = generate(model, tokenizer, big_prompts, generation_config)
|
95 |
+
|
96 |
+
print(output)
|
97 |
+
|
98 |
+
```
|
99 |
|
|