Update README.md
Browse files
README.md
CHANGED
@@ -1,10 +1,8 @@
|
|
1 |
---
|
2 |
-
license:
|
3 |
datasets:
|
4 |
-
-
|
5 |
-
-
|
6 |
-
- anon8231489123/ShareGPT_Vicuna_unfiltered
|
7 |
-
- NeelNanda/pile-10k
|
8 |
language:
|
9 |
- en
|
10 |
- es
|
@@ -15,6 +13,8 @@ metrics:
|
|
15 |
- accuracy
|
16 |
- bleu
|
17 |
pipeline_tag: text-generation
|
|
|
|
|
18 |
---
|
19 |
|
20 |
|
@@ -23,11 +23,9 @@ this model uses Task classification and the conversation is between USER and Ans
|
|
23 |
# NOTE β οΈ
|
24 |
|
25 |
|
26 |
-
THE JAX/FLAX version of model
|
27 |
|
28 |
|
29 |
-
This model is a finetuned version of Kolla with LGeM data With Respect to them and changes some data and optimizers
|
30 |
-
The model includes pre-trained Weights so it is GNU v3.0 licensed as the same as Original Llama Model
|
31 |
|
32 |
# Using Model in Huggingface Transformers
|
33 |
|
@@ -62,48 +60,12 @@ model = LlamaForCausalLM.from_pretrained(
|
|
62 |
torch_dtype=torch.float16
|
63 |
)
|
64 |
|
65 |
-
def generator(input_text,pipe_line,
|
66 |
verify_text = lambda txt : '\n'.join([textwrap.fill(txt, width=140) for txt in txt.split('\n')])
|
67 |
-
|
68 |
-
if code_es:
|
69 |
-
for code_e in code_es:
|
70 |
-
code = base64.b64decode(code_e).decode('utf-8')
|
71 |
-
regex = r"\b{}\b".format(re.escape(code))
|
72 |
-
encoded_word = base64.b64encode(code.encode('utf-8')).decode('utf-8')
|
73 |
-
text = re.sub(regex, encoded_word, text, flags=re.IGNORECASE)
|
74 |
-
pattern = r"\b" + re.escape(base64.b64decode('VUMgQmVya2VsZXk=').decode('utf-8')) + r"\b"
|
75 |
-
replacement = base64.b64decode('QUkgT3BlblNvdXJjZSBDb21tdW5pdHk=').decode('utf-8')
|
76 |
-
text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
|
77 |
-
encoded_text = base64.b64encode(text.encode('utf-8')).decode('utf-8')
|
78 |
-
block_size = 10
|
79 |
-
def is_text_safe(text):
|
80 |
-
"""
|
81 |
-
This function checks if the input text is safe by matching it against a regular expression pattern
|
82 |
-
that looks for potentially unsafe characters or patterns.
|
83 |
-
Returns True if the text is safe, and False otherwise.
|
84 |
-
"""
|
85 |
-
unsafe_pattern = r"[^\w\s\.\-\@]"
|
86 |
-
match_ae = re.search(unsafe_pattern, text)
|
87 |
-
if match_ae:
|
88 |
-
return False
|
89 |
-
else:
|
90 |
-
return True
|
91 |
-
if safty_checker:
|
92 |
-
res = is_text_safe(text)
|
93 |
-
blocks = [encoded_text[i:i+block_size] for i in range(0, len(encoded_text), block_size)]
|
94 |
-
import random
|
95 |
-
random.shuffle(blocks)
|
96 |
-
cka.append(blocks)
|
97 |
-
return text if not req else (text,blocks)
|
98 |
-
else:
|
99 |
-
return text
|
100 |
-
if not task in ['CONVERSATION', 'Q&A', 'INFO', 'EXPLAIN']:
|
101 |
-
raise ValueError(f"{task} is not available current tasks are => ['CONVERSATION', 'Q&A', 'INFO', 'EXPLAIN']")
|
102 |
orginal_text = input_text
|
103 |
if not input_text.startswith(f'{task}: USER:') and args_a:
|
104 |
-
input_text = f'{
|
105 |
-
if not input_text.endswith('\n\nAI:'):
|
106 |
-
input_text += '\n\nAI:'
|
107 |
for i in range(max_number):
|
108 |
exac = input_text
|
109 |
with torch.no_grad():
|
@@ -113,9 +75,9 @@ def generator(input_text,pipe_line,task='CONVERSATION',max_number=256,do_print=F
|
|
113 |
clear_output(wait=True)
|
114 |
print(verify_text(input_text))
|
115 |
|
116 |
-
if input_text.endswith('
|
117 |
break
|
118 |
-
yield
|
119 |
|
120 |
```
|
121 |
|
@@ -136,17 +98,6 @@ pipe_line = pipeline(
|
|
136 |
output_scores=True
|
137 |
|
138 |
)
|
139 |
-
cache = ''
|
140 |
-
cache_step = 0
|
141 |
-
while True:
|
142 |
-
input_ = cache+'\nUSER: '+input('>> ') if cache_step !=0 else input('>> ')
|
143 |
-
for i,t in enumerate(generator(input_,pipe_line=pipe_line,max_number=1024,args_a=False if cache_step != 0 else True)):
|
144 |
-
clear_output(wait=True)
|
145 |
-
print((f"{i} :\n {t}")[-3000:])
|
146 |
-
ou_t = t
|
147 |
-
cache += ou_t[len(cache):]
|
148 |
-
cache_step+=1
|
149 |
-
|
150 |
```
|
151 |
or Just Simply Open [GOOGLE COLAB ππ](https://colab.research.google.com/drive/1nWS_FhWIDH3-g56F3FbWCIYi0ngVdWHx?usp=sharing)
|
152 |
|
@@ -206,17 +157,20 @@ if __name__ == "__main__":
|
|
206 |
- you can simply import models like
|
207 |
|
208 |
```python
|
|
|
209 |
from modules import LGeMForCausalLM
|
|
|
|
|
210 |
```
|
211 |
|
212 |
-
- and Training code is available at
|
213 |
- training parameters
|
214 |
-
- - learning rate
|
215 |
-
- -
|
216 |
-
- - batch
|
217 |
-
- -
|
218 |
-
- - Train Time
|
219 |
-
- - budget
|
220 |
``` shell
|
221 |
python3 LGeM-train.py
|
222 |
```
|
|
|
1 |
---
|
2 |
+
license: apache-2.0
|
3 |
datasets:
|
4 |
+
- OpenAssistant/oasst1
|
5 |
+
- EleutherAI/pile
|
|
|
|
|
6 |
language:
|
7 |
- en
|
8 |
- es
|
|
|
13 |
- accuracy
|
14 |
- bleu
|
15 |
pipeline_tag: text-generation
|
16 |
+
tags:
|
17 |
+
- code
|
18 |
---
|
19 |
|
20 |
|
|
|
23 |
# NOTE β οΈ
|
24 |
|
25 |
|
26 |
+
THE JAX/FLAX version of model is available both for training and usage
|
27 |
|
28 |
|
|
|
|
|
29 |
|
30 |
# Using Model in Huggingface Transformers
|
31 |
|
|
|
60 |
torch_dtype=torch.float16
|
61 |
)
|
62 |
|
63 |
+
def generator(input_text,pipe_line,max_number=256,do_print=False ,args_a=False):
|
64 |
verify_text = lambda txt : '\n'.join([textwrap.fill(txt, width=140) for txt in txt.split('\n')])
|
65 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
orginal_text = input_text
|
67 |
if not input_text.startswith(f'{task}: USER:') and args_a:
|
68 |
+
input_text = f'<\s><|prompter|> {input_text}<\s><|ai|>'
|
|
|
|
|
69 |
for i in range(max_number):
|
70 |
exac = input_text
|
71 |
with torch.no_grad():
|
|
|
75 |
clear_output(wait=True)
|
76 |
print(verify_text(input_text))
|
77 |
|
78 |
+
if input_text.endswith('<\s>') and i>6 or exac == input_text or input_text.endswith('<|prompter|>') and i>6:
|
79 |
break
|
80 |
+
yield verify_text(input_text)
|
81 |
|
82 |
```
|
83 |
|
|
|
98 |
output_scores=True
|
99 |
|
100 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
```
|
102 |
or Just Simply Open [GOOGLE COLAB ππ](https://colab.research.google.com/drive/1nWS_FhWIDH3-g56F3FbWCIYi0ngVdWHx?usp=sharing)
|
103 |
|
|
|
157 |
- you can simply import models like
|
158 |
|
159 |
```python
|
160 |
+
# Pytorch
|
161 |
from modules import LGeMForCausalLM
|
162 |
+
# Jax
|
163 |
+
from modules import FlaxLGeMForCausalLM
|
164 |
```
|
165 |
|
166 |
+
- and Training code is available at jax_train.py (check source)
|
167 |
- training parameters
|
168 |
+
- - learning rate 5e-5
|
169 |
+
- - Optimizer LION
|
170 |
+
- - batch 32
|
171 |
+
- - TPU POD
|
172 |
+
- - Train Time 50 hours
|
173 |
+
- - budget 500 $
|
174 |
``` shell
|
175 |
python3 LGeM-train.py
|
176 |
```
|