mgoin commited on
Commit
e5b9307
β€’
1 Parent(s): a2dfb19
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: Tinystories Deepsparse
3
- emoji: πŸ“Š
4
- colorFrom: purple
5
  colorTo: gray
6
  sdk: gradio
7
- sdk_version: 3.43.2
8
  app_file: app.py
9
  pinned: false
10
  ---
 
1
  ---
2
+ title: TinyStories DeepSparse
3
+ emoji: 🏒
4
+ colorFrom: indigo
5
  colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 3.38.0
8
  app_file: app.py
9
  pinned: false
10
  ---
TinyStories-1M/config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roneneldan/TinyStories-1M",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPTNeoForCausalLM"
6
+ ],
7
+ "attention_dropout": 0,
8
+ "attention_layers": [
9
+ "global",
10
+ "local",
11
+ "global",
12
+ "local",
13
+ "global",
14
+ "local",
15
+ "global",
16
+ "local"
17
+ ],
18
+ "attention_types": [
19
+ [
20
+ [
21
+ "global",
22
+ "local"
23
+ ],
24
+ 4
25
+ ]
26
+ ],
27
+ "bos_token_id": 50256,
28
+ "classifier_dropout": 0.1,
29
+ "embed_dropout": 0,
30
+ "eos_token_id": 50256,
31
+ "gradient_checkpointing": false,
32
+ "hidden_size": 64,
33
+ "initializer_range": 0.02,
34
+ "intermediate_size": null,
35
+ "layer_norm_epsilon": 1e-05,
36
+ "max_position_embeddings": 2048,
37
+ "model_type": "gpt_neo",
38
+ "num_heads": 16,
39
+ "num_layers": 8,
40
+ "resid_dropout": 0,
41
+ "summary_activation": null,
42
+ "summary_first_dropout": 0.1,
43
+ "summary_proj_to_labels": true,
44
+ "summary_type": "cls_index",
45
+ "summary_use_proj": true,
46
+ "transformers_version": "4.32.0",
47
+ "use_cache": true,
48
+ "vocab_size": 50257,
49
+ "window_size": 256
50
+ }
TinyStories-1M/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.32.0"
6
+ }
TinyStories-1M/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
TinyStories-1M/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70d0b1c2abfe7f3979c12aa1c7a96223e09fefb490be6ac7b265f25cfb141c46
3
+ size 28012852
TinyStories-1M/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
TinyStories-1M/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
TinyStories-1M/tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": true,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "errors": "replace",
22
+ "model_max_length": 2048,
23
+ "pad_token": null,
24
+ "tokenizer_class": "GPT2Tokenizer",
25
+ "unk_token": {
26
+ "__type": "AddedToken",
27
+ "content": "<|endoftext|>",
28
+ "lstrip": false,
29
+ "normalized": true,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
TinyStories-1M/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import deepsparse
2
+ from transformers import TextIteratorStreamer
3
+ from threading import Thread
4
+ import time
5
+ import gradio as gr
6
+ from typing import Tuple, List
7
+
8
+ deepsparse.cpu.print_hardware_capability()
9
+
10
+ MODEL_PATH = "TinyStories-1M"
11
+
12
+ DESCRIPTION = f"""
13
+ # TinyStories - DeepSparse
14
+
15
+ The model stub for this example is: {MODEL_PATH}
16
+ """
17
+
18
+ MAX_MAX_NEW_TOKENS = 2048
19
+ DEFAULT_MAX_NEW_TOKENS = 128
20
+
21
+
22
+ def clear_and_save_textbox(message: str) -> Tuple[str, str]:
23
+ return "", message
24
+
25
+
26
+ def display_input(
27
+ message: str, history: List[Tuple[str, str]]
28
+ ) -> List[Tuple[str, str]]:
29
+ history.append((message, ""))
30
+ return history
31
+
32
+
33
+ def delete_prev_fn(history: List[Tuple[str, str]]) -> Tuple[List[Tuple[str, str]], str]:
34
+ try:
35
+ message, _ = history.pop()
36
+ except IndexError:
37
+ message = ""
38
+ return history, message or ""
39
+
40
+
41
+ # Setup the engine
42
+ pipe = deepsparse.Pipeline.create(
43
+ task="text-generation",
44
+ model_path=MODEL_PATH,
45
+ max_generated_tokens=DEFAULT_MAX_NEW_TOKENS,
46
+ sequence_length=MAX_MAX_NEW_TOKENS,
47
+ )
48
+
49
+
50
+ with gr.Blocks(css="style.css") as demo:
51
+ gr.Markdown(DESCRIPTION)
52
+
53
+ with gr.Group():
54
+ chatbot = gr.Chatbot(label="Chatbot")
55
+ with gr.Row():
56
+ textbox = gr.Textbox(
57
+ container=False,
58
+ show_label=False,
59
+ placeholder="Type a message...",
60
+ scale=10,
61
+ )
62
+ submit_button = gr.Button("Submit", variant="primary", scale=1, min_width=0)
63
+
64
+ with gr.Row():
65
+ retry_button = gr.Button("πŸ”„ Retry", variant="secondary")
66
+ undo_button = gr.Button("↩️ Undo", variant="secondary")
67
+ clear_button = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
68
+
69
+ saved_input = gr.State()
70
+
71
+ gr.Examples(
72
+ examples=["Once upon a time"],
73
+ inputs=[textbox],
74
+ )
75
+
76
+ max_new_tokens = gr.Slider(
77
+ label="Max new tokens",
78
+ minimum=1,
79
+ maximum=MAX_MAX_NEW_TOKENS,
80
+ step=1,
81
+ value=DEFAULT_MAX_NEW_TOKENS,
82
+ )
83
+ temperature = gr.Slider(
84
+ label="Temperature",
85
+ minimum=0.1,
86
+ maximum=4.0,
87
+ step=0.1,
88
+ value=1.0,
89
+ )
90
+
91
+ # Generation inference
92
+ def generate(message, history, max_new_tokens: int, temperature: float):
93
+ streamer = TextIteratorStreamer(pipe.tokenizer)
94
+ pipe.max_generated_tokens = max_new_tokens
95
+ pipe.sampling_temperature = temperature
96
+ generation_kwargs = dict(sequences=message, streamer=streamer)
97
+ thread = Thread(target=pipe, kwargs=generation_kwargs)
98
+ thread.start()
99
+ for new_text in streamer:
100
+ history[-1][1] += new_text
101
+ yield history
102
+ thread.join()
103
+ print(pipe.timer_manager)
104
+
105
+ # Hooking up all the buttons
106
+ textbox.submit(
107
+ fn=clear_and_save_textbox,
108
+ inputs=textbox,
109
+ outputs=[textbox, saved_input],
110
+ api_name=False,
111
+ queue=False,
112
+ ).then(
113
+ fn=display_input,
114
+ inputs=[saved_input, chatbot],
115
+ outputs=chatbot,
116
+ api_name=False,
117
+ queue=False,
118
+ ).success(
119
+ generate,
120
+ inputs=[saved_input, chatbot, max_new_tokens, temperature],
121
+ outputs=[chatbot],
122
+ api_name=False,
123
+ )
124
+
125
+ submit_button.click(
126
+ fn=clear_and_save_textbox,
127
+ inputs=textbox,
128
+ outputs=[textbox, saved_input],
129
+ api_name=False,
130
+ queue=False,
131
+ ).then(
132
+ fn=display_input,
133
+ inputs=[saved_input, chatbot],
134
+ outputs=chatbot,
135
+ api_name=False,
136
+ queue=False,
137
+ ).success(
138
+ generate,
139
+ inputs=[saved_input, chatbot, max_new_tokens, temperature],
140
+ outputs=[chatbot],
141
+ api_name=False,
142
+ )
143
+
144
+ retry_button.click(
145
+ fn=delete_prev_fn,
146
+ inputs=chatbot,
147
+ outputs=[chatbot, saved_input],
148
+ api_name=False,
149
+ queue=False,
150
+ ).then(
151
+ fn=display_input,
152
+ inputs=[saved_input, chatbot],
153
+ outputs=chatbot,
154
+ api_name=False,
155
+ queue=False,
156
+ ).then(
157
+ generate,
158
+ inputs=[saved_input, chatbot, max_new_tokens, temperature],
159
+ outputs=[chatbot],
160
+ api_name=False,
161
+ )
162
+
163
+ undo_button.click(
164
+ fn=delete_prev_fn,
165
+ inputs=chatbot,
166
+ outputs=[chatbot, saved_input],
167
+ api_name=False,
168
+ queue=False,
169
+ ).then(
170
+ fn=lambda x: x,
171
+ inputs=[saved_input],
172
+ outputs=textbox,
173
+ api_name=False,
174
+ queue=False,
175
+ )
176
+
177
+ clear_button.click(
178
+ fn=lambda: ([], ""),
179
+ outputs=[chatbot, saved_input],
180
+ queue=False,
181
+ api_name=False,
182
+ )
183
+
184
+ demo.queue().launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ deepsparse-nightly
2
+ transformers
3
+ gradio
style.css ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ }
4
+
5
+ #duplicate-button {
6
+ margin: auto;
7
+ color: white;
8
+ background: #1565c0;
9
+ border-radius: 100vh;
10
+ }
11
+
12
+ #component-0 {
13
+ max-width: 900px;
14
+ margin: auto;
15
+ padding-top: 1.5rem;
16
+ }