helenai commited on
Commit
a873875
1 Parent(s): 7927870
Files changed (1) hide show
  1. app.py +47 -12
app.py CHANGED
@@ -1,9 +1,13 @@
 
 
1
  from threading import Thread
2
 
3
- import torch
4
  import gradio as gr
5
- from transformers import AutoTokenizer, TextIteratorStreamer
6
  from optimum.intel.openvino import OVModelForSeq2SeqLM
 
 
 
 
7
 
8
  original_model_id = "declare-lab/flan-alpaca-xl"
9
  original_model_id = "declare-lab/flan-alpaca-large"
@@ -12,13 +16,16 @@ model_id = f"helenai/{original_model_id.replace('/','-')}-ov"
12
  model = OVModelForSeq2SeqLM.from_pretrained(model_id)
13
  tokenizer = AutoTokenizer.from_pretrained(model_id)
14
 
 
15
  def run_generation(user_text, top_p, temperature, top_k, max_new_tokens):
16
  # Get the model and tokenizer, and tokenize the user text.
17
  model_inputs = tokenizer([user_text], return_tensors="pt")
18
 
19
  # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
20
  # in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
21
- streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
 
 
22
  generate_kwargs = dict(
23
  model_inputs,
24
  streamer=streamer,
@@ -26,7 +33,7 @@ def run_generation(user_text, top_p, temperature, top_k, max_new_tokens):
26
  do_sample=True,
27
  top_p=top_p,
28
  temperature=float(temperature),
29
- top_k=top_k
30
  )
31
  t = Thread(target=model.generate, kwargs=generate_kwargs)
32
  t.start()
@@ -40,7 +47,7 @@ def run_generation(user_text, top_p, temperature, top_k, max_new_tokens):
40
 
41
 
42
  def reset_textbox():
43
- return gr.update(value='')
44
 
45
 
46
  with gr.Blocks() as demo:
@@ -59,26 +66,54 @@ with gr.Blocks() as demo:
59
  with gr.Column(scale=4):
60
  user_text = gr.Textbox(
61
  placeholder="Write an email about an alpaca that likes flan",
62
- label="User input"
63
  )
64
  model_output = gr.Textbox(label="Model output", lines=10, interactive=False)
65
  button_submit = gr.Button(value="Submit")
66
 
67
  with gr.Column(scale=1):
68
  max_new_tokens = gr.Slider(
69
- minimum=1, maximum=1000, value=250, step=1, interactive=True, label="Max New Tokens",
 
 
 
 
 
70
  )
71
  top_p = gr.Slider(
72
- minimum=0.05, maximum=1.0, value=0.95, step=0.05, interactive=True, label="Top-p (nucleus sampling)",
 
 
 
 
 
73
  )
74
  top_k = gr.Slider(
75
- minimum=1, maximum=50, value=50, step=1, interactive=True, label="Top-k",
 
 
 
 
 
76
  )
77
  temperature = gr.Slider(
78
- minimum=0.1, maximum=5.0, value=0.8, step=0.1, interactive=True, label="Temperature",
 
 
 
 
 
79
  )
80
 
81
- user_text.submit(run_generation, [user_text, top_p, temperature, top_k, max_new_tokens], model_output)
82
- button_submit.click(run_generation, [user_text, top_p, temperature, top_k, max_new_tokens], model_output)
 
 
 
 
 
 
 
 
83
 
84
  demo.queue(max_size=32).launch(enable_queue=True, server_name="0.0.0.0")
 
1
+ import pprint
2
+ import subprocess
3
  from threading import Thread
4
 
 
5
  import gradio as gr
 
6
  from optimum.intel.openvino import OVModelForSeq2SeqLM
7
+ from transformers import AutoTokenizer, TextIteratorStreamer
8
+
9
+ result = subprocess.run(["lscpu"], text=True, capture_output=True)
10
+ pprint.pprint(result.stdout)
11
 
12
  original_model_id = "declare-lab/flan-alpaca-xl"
13
  original_model_id = "declare-lab/flan-alpaca-large"
 
16
  model = OVModelForSeq2SeqLM.from_pretrained(model_id)
17
  tokenizer = AutoTokenizer.from_pretrained(model_id)
18
 
19
+
20
  def run_generation(user_text, top_p, temperature, top_k, max_new_tokens):
21
  # Get the model and tokenizer, and tokenize the user text.
22
  model_inputs = tokenizer([user_text], return_tensors="pt")
23
 
24
  # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
25
  # in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
26
+ streamer = TextIteratorStreamer(
27
+ tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
28
+ )
29
  generate_kwargs = dict(
30
  model_inputs,
31
  streamer=streamer,
 
33
  do_sample=True,
34
  top_p=top_p,
35
  temperature=float(temperature),
36
+ top_k=top_k,
37
  )
38
  t = Thread(target=model.generate, kwargs=generate_kwargs)
39
  t.start()
 
47
 
48
 
49
  def reset_textbox():
50
+ return gr.update(value="")
51
 
52
 
53
  with gr.Blocks() as demo:
 
66
  with gr.Column(scale=4):
67
  user_text = gr.Textbox(
68
  placeholder="Write an email about an alpaca that likes flan",
69
+ label="User input",
70
  )
71
  model_output = gr.Textbox(label="Model output", lines=10, interactive=False)
72
  button_submit = gr.Button(value="Submit")
73
 
74
  with gr.Column(scale=1):
75
  max_new_tokens = gr.Slider(
76
+ minimum=1,
77
+ maximum=1000,
78
+ value=250,
79
+ step=1,
80
+ interactive=True,
81
+ label="Max New Tokens",
82
  )
83
  top_p = gr.Slider(
84
+ minimum=0.05,
85
+ maximum=1.0,
86
+ value=0.95,
87
+ step=0.05,
88
+ interactive=True,
89
+ label="Top-p (nucleus sampling)",
90
  )
91
  top_k = gr.Slider(
92
+ minimum=1,
93
+ maximum=50,
94
+ value=50,
95
+ step=1,
96
+ interactive=True,
97
+ label="Top-k",
98
  )
99
  temperature = gr.Slider(
100
+ minimum=0.1,
101
+ maximum=5.0,
102
+ value=0.8,
103
+ step=0.1,
104
+ interactive=True,
105
+ label="Temperature",
106
  )
107
 
108
+ user_text.submit(
109
+ run_generation,
110
+ [user_text, top_p, temperature, top_k, max_new_tokens],
111
+ model_output,
112
+ )
113
+ button_submit.click(
114
+ run_generation,
115
+ [user_text, top_p, temperature, top_k, max_new_tokens],
116
+ model_output,
117
+ )
118
 
119
  demo.queue(max_size=32).launch(enable_queue=True, server_name="0.0.0.0")